Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally

2025-02-11 06:05:49 +00:00 · 2022-06-15 18:30:27 +00:00 · 2022-06-15 18:30:27 +00:00 · c2493168b6
commit c2493168b6
parent 76e30ed8ab d69a8f4077
81 changed files with 99537 additions and 4059 deletions
--- a/.gitignore
+++ b/.gitignore
@ -10,6 +10,10 @@ __pycache__/
 addins
 addins/riscv-arch-test/Makefile.include
 addins/riscv-tests/target
+addins/coremark/work/*
+addins/embench/bd_speed/*
+addins/embench/bd_size/*
+benchmarks/embench/wally*.json

 #vsim work files to ignore
 transcript
--- a/.gitmodules
+++ b/.gitmodules
@ -17,6 +17,9 @@
 [submodule "addins/embench-iot"]
 	path = addins/embench-iot
 	url = https://github.com/embench/embench-iot
+[submodule "addins/coremark"]
+	path = addins/coremark
+	url = https://github.com/eembc/coremark
 [submodule "addins/sky130_osu_sc_t18"]
 	path = addins/sky130_osu_sc_t18
 	url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t18
--- a/addins/coremark
+++ b/addins/coremark
@ -0,0 +1 @@
+Subproject commit f3e8f2e0941e42961aadcc52750b1b5577c157c9
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
--- a/benchmarks/coremark/Makefile
+++ b/benchmarks/coremark/Makefile
@ -0,0 +1,29 @@
+#cmbase=../../addins/coremark
+PORT_DIR = $(CURDIR)/riscv64-baremetal
+cmbase=../../addins/coremark
+work_dir=$(cmbase)/work
+sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h  \
+        $(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \
+		$(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \
+		$(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c
+
+$(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
+	riscv64-unknown-elf-objdump -D $< > $<.elf.objdump
+	riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@
+	extractFunctionRadix.sh $<.elf.objdump
+	(cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv64gc coremark" > $(work_dir)/coremark.sim.log))
+	cd ../../benchmarks/coremark/
+
+$(work_dir)/coremark.bare.riscv: $(sources) Makefile
+    # make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta"
+    # These flags were used by WD on CoreMark
+	make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " 
+    # -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error
+	mkdir -p $(work_dir)
+	mv $(cmbase)/coremark.bare.riscv $(work_dir)
+
+
+.PHONY: clean
+
+clean:
+	rm -f $(work_dir)/*
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.c
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c
@ -0,0 +1,385 @@
+/*
+Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Original Author: Shay Gal-on
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "coremark.h"
+#if CALLGRIND_RUN
+#include <valgrind/callgrind.h>
+#endif
+
+#if (MEM_METHOD==MEM_MALLOC)
+#include <malloc.h>
+/* Function: portable_malloc
+	Provide malloc() functionality in a platform specific way.
+*/
+void *portable_malloc(size_t size) {
+	return malloc(size);
+}
+/* Function: portable_free
+	Provide free() functionality in a platform specific way.
+*/
+void portable_free(void *p) {
+	free(p);
+}
+#else
+void *portable_malloc(size_t size) {
+	return NULL;
+}
+void portable_free(void *p) {
+	p=NULL;
+}
+#endif
+
+#if (SEED_METHOD==SEED_VOLATILE)
+#if VALIDATION_RUN
+	volatile ee_s32 seed1_volatile=0x3415;
+	volatile ee_s32 seed2_volatile=0x3415;
+	volatile ee_s32 seed3_volatile=0x66;
+#endif
+#if PERFORMANCE_RUN
+	volatile ee_s32 seed1_volatile=0x0;
+	volatile ee_s32 seed2_volatile=0x0;
+	volatile ee_s32 seed3_volatile=0x66;
+#endif
+#if PROFILE_RUN
+	volatile ee_s32 seed1_volatile=0x8;
+	volatile ee_s32 seed2_volatile=0x8;
+	volatile ee_s32 seed3_volatile=0x8;
+#endif
+	volatile ee_s32 seed4_volatile=ITERATIONS;
+	volatile ee_s32 seed5_volatile=0;
+#endif
+/* Porting: Timing functions
+	How to capture time and convert to seconds must be ported to whatever is supported by the platform.
+	e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
+	Sample implementation for standard time.h and windows.h definitions included.
+*/
+/* Define: TIMER_RES_DIVIDER
+	Divider to trade off timer resolution and total time that can be measured.
+
+	Use lower values to increase resolution, but make sure that overflow does not occur.
+	If there are issues with the return value overflowing, increase this value.
+	*/
+#if USE_CLOCK
+	#define NSECS_PER_SEC CLOCKS_PER_SEC
+	#define EE_TIMER_TICKER_RATE 1000
+	#define CORETIMETYPE clock_t
+	#define GETMYTIME(_t) (*_t=clock())
+	#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
+	#define TIMER_RES_DIVIDER 1
+	#define SAMPLE_TIME_IMPLEMENTATION 1
+#elif defined(_MSC_VER)
+	#define NSECS_PER_SEC 10000000
+	#define EE_TIMER_TICKER_RATE 1000
+	#define CORETIMETYPE FILETIME
+	#define GETMYTIME(_t) GetSystemTimeAsFileTime(_t)
+	#define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER)
+	/* setting to millisces resolution by default with MSDEV */
+	#ifndef TIMER_RES_DIVIDER
+	#define TIMER_RES_DIVIDER 1000
+	#endif
+	#define SAMPLE_TIME_IMPLEMENTATION 1
+#elif HAS_TIME_H
+	#define NSECS_PER_SEC 1000000000
+	#define EE_TIMER_TICKER_RATE 1000
+	#define CORETIMETYPE struct timespec
+	#define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t)
+	#define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER)
+	/* setting to 1/1000 of a second resolution by default with linux */
+	#ifndef TIMER_RES_DIVIDER
+	#define TIMER_RES_DIVIDER 1000000
+	#endif
+	#define SAMPLE_TIME_IMPLEMENTATION 1
+#else
+    // Defined for RISCV
+    #define NSECS_PER_SEC 1000000000 // TODO: What freq are we assuming?
+	#define EE_TIMER_TICKER_RATE 1000 // TODO: What is this?
+	#define CORETIMETYPE clock_t
+    #define read_csr(reg) ({ unsigned long __tmp; \
+       asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
+       __tmp; })
+    #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
+	#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
+	// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
+	#define TIMER_RES_DIVIDER 10000
+	#define SAMPLE_TIME_IMPLEMENTATION 1
+#endif
+#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
+
+#if SAMPLE_TIME_IMPLEMENTATION
+/** Define Host specific (POSIX), or target specific global time variables. */
+static CORETIMETYPE start_time_val, stop_time_val;
+static unsigned long start_instr_val, stop_instr_val;
+
+/* Function: minstretFunc
+	This function will count the number of instructions.
+*/
+unsigned long minstretFunc(void)
+{
+	unsigned long minstretRead = read_csr(minstret);
+	//ee_printf("Minstret is %lu\n", minstretRead);
+	return minstretRead;
+}
+
+/* Function: minstretDiff
+	This function will take the difference between the first and second reads from the
+	MINSTRET csr to determine the number of machine instructions retired between two points
+	of time
+*/
+unsigned long minstretDiff(void)
+{
+	unsigned long minstretDifference = MYTIMEDIFF(stop_instr_val, start_instr_val);
+	return minstretDifference;
+}
+
+/* Function: start_time
+	This function will be called right before starting the timed portion of the benchmark.
+
+	Implementation may be capturing a system timer (as implemented in the example code)
+	or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
+*/
+void start_time(void) {
+	start_instr_val = minstretFunc();
+	GETMYTIME(start_time_val);
+	//ee_printf("Timer started\n");
+	//ee_printf("  MTIME: %u\n", start_time_val);
+#if CALLGRIND_RUN
+	CALLGRIND_START_INSTRUMENTATION
+#endif
+#if MICA
+    asm volatile("int3");/*1 */
+#endif
+}
+/* Function: stop_time
+	This function will be called right after ending the timed portion of the benchmark.
+
+	Implementation may be capturing a system timer (as implemented in the example code)
+	or other system parameters - e.g. reading the current value of cpu cycles counter.
+*/
+void stop_time(void) {
+#if CALLGRIND_RUN
+	 CALLGRIND_STOP_INSTRUMENTATION
+#endif
+#if MICA
+    asm volatile("int3");/*1 */
+#endif
+	GETMYTIME(stop_time_val);
+	stop_instr_val = minstretFunc();
+	//ee_printf("Timer stopped\n");
+	//ee_printf("  MTIME: %u\n", stop_time_val);
+}
+/* Function: get_time
+	Return an abstract "ticks" number that signifies time on the system.
+
+	Actual value returned may be cpu cycles, milliseconds or any other value,
+	as long as it can be converted to seconds by <time_in_secs>.
+	This methodology is taken to accomodate any hardware or simulated platform.
+	The sample implementation returns millisecs by default,
+	and the resolution is controlled by <TIMER_RES_DIVIDER>
+*/
+CORE_TICKS get_time(void) {
+	CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
+	unsigned long instructions = minstretDiff();
+	ee_printf("   Called get_time\n");
+	ee_printf("    Elapsed MTIME: %u\n", elapsed);
+	ee_printf("    Elapsed MINSTRET: %lu\n", instructions);
+	ee_printf("    CPI: %lu / %lu\n", elapsed, instructions); 
+	return elapsed;
+}
+/* Function: time_in_secs
+	Convert the value returned by get_time to seconds.
+
+	The <secs_ret> type is used to accomodate systems with no support for floating point.
+	Default implementation implemented by the EE_TICKS_PER_SEC macro above.
+*/
+secs_ret time_in_secs(CORE_TICKS ticks) {
+	secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
+	float retvalint = (float) retval;
+	ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
+	ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retvalint);
+	ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retval);
+	ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retval);
+	return retvalint;
+}
+#else
+#error "Please implement timing functionality in core_portme.c"
+#endif /* SAMPLE_TIME_IMPLEMENTATION */
+
+ee_u32 default_num_contexts = MULTITHREAD;
+
+/* Function: portable_init
+	Target specific initialization code
+	Test for some common mistakes.
+*/
+void portable_init(core_portable *p, int *argc, char *argv[])
+{
+#if PRINT_ARGS
+	int i;
+	for (i=0; i<*argc; i++) {
+		ee_printf("Arg[%d]=%s\n",i,argv[i]);
+	}
+#endif
+	if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
+		ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
+	}
+	if (sizeof(ee_u32) != 4) {
+		ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
+	}
+#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG))
+	ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n");
+#endif
+
+#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG)
+	int nargs=*argc,i;
+	if ((nargs>1) && (*argv[1]=='M')) {
+		default_num_contexts=parseval(argv[1]+1);
+		if (default_num_contexts>MULTITHREAD)
+			default_num_contexts=MULTITHREAD;
+		/* Shift args since first arg is directed to the portable part and not to coremark main */
+		--nargs;
+		for (i=1; i<nargs; i++)
+			argv[i]=argv[i+1];
+		*argc=nargs;
+	}
+#endif /* sample of potential platform specific init via command line, reset the number of contexts being used if first argument is M<n>*/
+	p->portable_id=1;
+}
+/* Function: portable_fini
+	Target specific final code
+*/
+void portable_fini(core_portable *p)
+{
+	p->portable_id=0;
+}
+
+#if (MULTITHREAD>1)
+
+/* Function: core_start_parallel
+	Start benchmarking in a parallel context.
+
+	Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.
+	Other implementations using MCAPI or other standards can easily be devised.
+*/
+/* Function: core_stop_parallel
+	Stop a parallel context execution of coremark, and gather the results.
+
+	Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.
+	Other implementations using MCAPI or other standards can easily be devised.
+*/
+#if USE_PTHREAD
+ee_u8 core_start_parallel(core_results *res) {
+	return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res);
+}
+ee_u8 core_stop_parallel(core_results *res) {
+	void *retval;
+	return (ee_u8)pthread_join(res->port.thread,&retval);
+}
+#elif USE_FORK
+static int key_id=0;
+ee_u8 core_start_parallel(core_results *res) {
+	key_t key=4321+key_id;
+	key_id++;
+	res->port.pid=fork();
+	res->port.shmid=shmget(key, 8, IPC_CREAT | 0666);
+	if (res->port.shmid<0) {
+		ee_printf("ERROR in shmget!\n");
+	}
+	if (res->port.pid==0) {
+		iterate(res);
+		res->port.shm=shmat(res->port.shmid, NULL, 0);
+		/* copy the validation values to the shared memory area  and quit*/
+		if (res->port.shm == (char *) -1) {
+			ee_printf("ERROR in child shmat!\n");
+		} else {
+			memcpy(res->port.shm,&(res->crc),8);
+			shmdt(res->port.shm);
+		}
+		exit(0);
+	}
+	return 1;
+}
+ee_u8 core_stop_parallel(core_results *res) {
+	int status;
+	pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED);
+	if (wpid != res->port.pid) {
+		ee_printf("ERROR waiting for child.\n");
+		if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid);
+		if (errno == EINTR) ee_printf("errno=Interrupted\n");
+		return 0;
+	}
+	/* after process is done, get the values from the shared memory area */
+	res->port.shm=shmat(res->port.shmid, NULL, 0);
+	if (res->port.shm == (char *) -1) {
+		ee_printf("ERROR in parent shmat!\n");
+		return 0;
+	}
+	memcpy(&(res->crc),res->port.shm,8);
+	shmdt(res->port.shm);
+	return 1;
+}
+#elif USE_SOCKET
+static int key_id=0;
+ee_u8 core_start_parallel(core_results *res) {
+	int bound, buffer_length=8;
+	res->port.sa.sin_family = AF_INET;
+	res->port.sa.sin_addr.s_addr = htonl(0x7F000001);
+	res->port.sa.sin_port = htons(7654+key_id);
+	key_id++;
+	res->port.pid=fork();
+	if (res->port.pid==0) { /* benchmark child */
+		iterate(res);
+		res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+		if (-1 == res->port.sock) /* if socket failed to initialize, exit */   {
+			ee_printf("Error Creating Socket");
+		} else {
+			int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in));
+			if (bytes_sent < 0)
+				ee_printf("Error sending packet: %s\n", strerror(errno));
+			close(res->port.sock); /* close the socket */
+		}
+		exit(0);
+	}
+	/* parent process, open the socket */
+	res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+	bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr));
+	if (bound < 0)
+		ee_printf("bind(): %s\n",strerror(errno));
+	return 1;
+}
+ee_u8 core_stop_parallel(core_results *res) {
+	int status;
+	int fromlen=sizeof(struct sockaddr);
+	int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen);
+	if (recsize < 0) {
+		ee_printf("Error in receive: %s\n", strerror(errno));
+		return 0;
+	}
+	pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED);
+	if (wpid != res->port.pid) {
+		ee_printf("ERROR waiting for child.\n");
+		if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid);
+		if (errno == EINTR) ee_printf("errno=Interrupted\n");
+		return 0;
+	}
+	return 1;
+}
+#else /* no standard multicore implementation */
+#error "Please implement multicore functionality in core_portme.c to use multiple contexts."
+#endif /* multithread implementations */
+#endif
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.h
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h
@ -0,0 +1,296 @@
+/*
+Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Original Author: Shay Gal-on
+*/
+
+/* Topic: Description
+	This file contains configuration constants required to execute on different platforms
+*/
+#ifndef CORE_PORTME_H
+#define CORE_PORTME_H
+/************************/
+/* Data types and settings */
+/************************/
+/* Configuration: HAS_FLOAT
+	Define to 1 if the platform supports floating point.
+*/
+#ifndef HAS_FLOAT
+#define HAS_FLOAT 1
+#endif
+/* Configuration: HAS_TIME_H
+	Define to 1 if platform has the time.h header file,
+	and implementation of functions thereof.
+*/
+#ifndef HAS_TIME_H
+#define HAS_TIME_H 0
+#endif
+/* Configuration: USE_CLOCK
+	Define to 1 if platform has the time.h header file,
+	and implementation of functions thereof.
+*/
+#ifndef USE_CLOCK
+#define USE_CLOCK 0
+#endif
+/* Configuration: HAS_STDIO
+	Define to 1 if the platform has stdio.h.
+*/
+#ifndef HAS_STDIO
+#define HAS_STDIO 1
+#endif
+/* Configuration: HAS_PRINTF
+	Define to 1 if the platform has stdio.h and implements the printf function.
+*/
+#ifndef HAS_PRINTF
+#define HAS_PRINTF 1
+#endif
+
+/* Configuration: CORE_TICKS
+	Define type of return from the timing functions.
+ */
+#if defined(_MSC_VER)
+#include <windows.h>
+typedef size_t CORE_TICKS;
+#elif HAS_TIME_H
+#include <time.h>
+typedef clock_t CORE_TICKS;
+#else
+/* Configuration: size_t and clock_t
+     Note these need to match the size of the clock output and the xLen the processor supports
+ */
+typedef unsigned long int size_t;
+typedef unsigned long int clock_t;
+typedef clock_t CORE_TICKS;
+#endif
+
+/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
+	Initialize these strings per platform
+*/
+#ifndef COMPILER_VERSION
+ #ifdef __GNUC__
+ #define COMPILER_VERSION "GCC"__VERSION__
+ #else
+ #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
+ #endif
+#endif
+#ifndef COMPILER_FLAGS
+ #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
+#endif
+#ifndef MEM_LOCATION
+ #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)"
+ #define MEM_LOCATION_UNSPEC 1
+#endif
+
+/* Data Types:
+	To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
+
+	*Imprtant*:
+	ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
+*/
+typedef signed short ee_s16;
+typedef unsigned short ee_u16;
+typedef signed int ee_s32;
+typedef double ee_f32;
+typedef unsigned char ee_u8;
+typedef unsigned int ee_u32;
+typedef unsigned long long ee_ptr_int;
+typedef size_t ee_size_t;
+/* align an offset to point to a 32b value */
+#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
+
+/* Configuration: SEED_METHOD
+	Defines method to get seed values that cannot be computed at compile time.
+
+	Valid values:
+	SEED_ARG - from command line.
+	SEED_FUNC - from a system function.
+	SEED_VOLATILE - from volatile variables.
+*/
+#ifndef SEED_METHOD
+#define SEED_METHOD SEED_VOLATILE
+#endif
+
+/* Configuration: MEM_METHOD
+	Defines method to get a block of memry.
+
+	Valid values:
+	MEM_MALLOC - for platforms that implement malloc and have malloc.h.
+	MEM_STATIC - to use a static memory array.
+	MEM_STACK - to allocate the data block on the stack (NYI).
+*/
+#ifndef MEM_METHOD
+#define MEM_METHOD MEM_STATIC
+#endif
+
+/* Configuration: MULTITHREAD
+	Define for parallel execution
+
+	Valid values:
+	1 - only one context (default).
+	N>1 - will execute N copies in parallel.
+
+	Note:
+	If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
+
+	Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
+
+	It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
+	to fit a particular architecture.
+*/
+#ifndef MULTITHREAD
+#define MULTITHREAD 1
+#endif
+
+/* Configuration: USE_PTHREAD
+	Sample implementation for launching parallel contexts
+	This implementation uses pthread_thread_create and pthread_join.
+
+	Valid values:
+	0 - Do not use pthreads API.
+	1 - Use pthreads API
+
+	Note:
+	This flag only matters if MULTITHREAD has been defined to a value greater then 1.
+*/
+#ifndef USE_PTHREAD
+#define USE_PTHREAD 0
+#endif
+
+/* Configuration: USE_FORK
+	Sample implementation for launching parallel contexts
+	This implementation uses fork, waitpid, shmget,shmat and shmdt.
+
+	Valid values:
+	0 - Do not use fork API.
+	1 - Use fork API
+
+	Note:
+	This flag only matters if MULTITHREAD has been defined to a value greater then 1.
+*/
+#ifndef USE_FORK
+#define USE_FORK 0
+#endif
+
+/* Configuration: USE_SOCKET
+	Sample implementation for launching parallel contexts
+	This implementation uses fork, socket, sendto and recvfrom
+
+	Valid values:
+	0 - Do not use fork and sockets API.
+	1 - Use fork and sockets API
+
+	Note:
+	This flag only matters if MULTITHREAD has been defined to a value greater then 1.
+*/
+#ifndef USE_SOCKET
+#define USE_SOCKET 0
+#endif
+
+/* Configuration: MAIN_HAS_NOARGC
+	Needed if platform does not support getting arguments to main.
+
+	Valid values:
+	0 - argc/argv to main is supported
+	1 - argc/argv to main is not supported
+*/
+#ifndef MAIN_HAS_NOARGC
+#define MAIN_HAS_NOARGC 1
+#endif
+
+/* Configuration: MAIN_HAS_NORETURN
+	Needed if platform does not support returning a value from main.
+
+	Valid values:
+	0 - main returns an int, and return value will be 0.
+	1 - platform does not support returning a value from main
+*/
+#ifndef MAIN_HAS_NORETURN
+#define MAIN_HAS_NORETURN 0
+#endif
+
+/* Variable: default_num_contexts
+	Number of contexts to spawn in multicore context.
+	Override this global value to change number of contexts used.
+
+	Note:
+	This value may not be set higher then the <MULTITHREAD> define.
+
+	To experiment, you can set the <MULTITHREAD> define to the highest value expected, and use argc/argv in the <portable_init> to set this value from the command line.
+*/
+extern ee_u32 default_num_contexts;
+
+#if (MULTITHREAD>1)
+#if USE_PTHREAD
+	#include <pthread.h>
+	#define PARALLEL_METHOD "PThreads"
+#elif USE_FORK
+	#include <unistd.h>
+	#include <errno.h>
+	#include <sys/wait.h>
+	#include <sys/shm.h>
+	#include <string.h> /* for memcpy */
+	#define PARALLEL_METHOD "Fork"
+#elif USE_SOCKET
+	#include <sys/types.h>
+	#include <sys/socket.h>
+	#include <netinet/in.h>
+	#include <arpa/inet.h>
+	#include <sys/wait.h>
+	#include <stdio.h>
+	#include <stdlib.h>
+	#include <string.h>
+	#include <unistd.h>
+	#include <errno.h>
+	#define PARALLEL_METHOD "Sockets"
+#else
+	#define PARALLEL_METHOD "Proprietary"
+	#error "Please implement multicore functionality in core_portme.c to use multiple contexts."
+#endif /* Method for multithreading */
+#endif /* MULTITHREAD > 1 */
+
+typedef struct CORE_PORTABLE_S {
+#if (MULTITHREAD>1)
+	#if USE_PTHREAD
+	pthread_t thread;
+	#elif USE_FORK
+	pid_t pid;
+	int shmid;
+	void *shm;
+	#elif USE_SOCKET
+	pid_t pid;
+	int sock;
+	struct sockaddr_in sa;
+	#endif /* Method for multithreading */
+#endif /* MULTITHREAD>1 */
+	ee_u8	portable_id;
+} core_portable;
+
+/* target specific init/fini */
+void portable_init(core_portable *p, int *argc, char *argv[]);
+void portable_fini(core_portable *p);
+
+#if (SEED_METHOD==SEED_VOLATILE)
+ #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN)
+  #define RUN_TYPE_FLAG 1
+ #else
+  #if (TOTAL_DATA_SIZE==1200)
+   #define PROFILE_RUN 1
+  #else
+   #define PERFORMANCE_RUN 1
+  #endif
+ #endif
+#endif /* SEED_METHOD==SEED_VOLATILE */
+
+#endif /* CORE_PORTME_H */
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.mak
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.mak
@ -0,0 +1,149 @@
+# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Original Author: Shay Gal-on
+
+#File: core_portme.mak
+
+# Flag: RISCVTOOLS
+#	Use this flag to point to your RISCV tools
+RISCVTOOLS=$(RISCV)
+# Flag: RISCVTYPE
+#   Type of toolchain to use
+RISCVTYPE=riscv64-unknown-elf
+# Flag: OUTFLAG
+#	Use this flag to define how to to get an executable (e.g -o)
+OUTFLAG= -o
+# Flag: CC
+#	Use this flag to define compiler to use
+# david_harris@hmc.edu 20 Nov 2021 removed full path; require
+CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc 
+#CC = $(RISCVTYPE)-gcc
+# Flag: CFLAGS
+#	Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
+#PORT_CFLAGS = -O2 -static -std=gnu99
+PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld
+FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
+CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
+#Flag: LFLAGS_END
+#	Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
+#	Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
+LFLAGS_END +=
+# Flag: PORT_SRCS
+# Port specific source files can be added here
+PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S
+# Flag: LOAD
+#	Define this flag if you need to load to a target, as in a cross compile environment.
+
+# Flag: RUN
+#	Define this flag if running does not consist of simple invocation of the binary.
+#	In a cross compile environment, you need to define this.
+
+#For flashing and using a tera term macro, you could use
+#LOAD = flash ADDR
+#RUN =  ttpmacro coremark.ttl
+
+#For copying to target and executing via SSH connection, you could use
+#LOAD = scp $(OUTFILE)  user@target:~
+#RUN = ssh user@target -c
+
+#For native compilation and execution
+LOAD = echo Loading done
+RUN = spike pk
+
+OEXT = .o
+EXE = .bare.riscv
+
+# Flag: SEPARATE_COMPILE
+# Define if you need to separate compilation from link stage.
+# In this case, you also need to define below how to create an object file, and how to link.
+ifdef SEPARATE_COMPILE
+
+LD		= $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc
+OBJOUT 	= -o
+LFLAGS 	=
+OFLAG 	= -o
+COUT 	= -c
+# Flag: PORT_OBJS
+# Port specific object files can be added here
+PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
+PORT_CLEAN = *$(OEXT)
+
+$(OPATH)%$(OEXT) : %.c
+	$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
+
+endif
+
+# Target: port_prebuild
+# Generate any files that are needed before actual build starts.
+# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
+#  - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
+#  - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
+#  Note - Using REBUILD=1
+#
+# Use make PGO=1 to invoke this sample processing.
+
+ifdef PGO
+ ifeq (,$(findstring $(PGO),gen))
+  PGO_STAGE=build_pgo_gcc
+  CFLAGS+=-fprofile-use
+ endif
+ PORT_CLEAN+=*.gcda *.gcno gmon.out
+endif
+
+.PHONY: port_prebuild
+port_prebuild: $(PGO_STAGE)
+
+.PHONY: build_pgo_gcc
+build_pgo_gcc:
+	$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
+
+# Target: port_postbuild
+# Generate any files that are needed after actual build end.
+# E.g. change format to srec, bin, zip in order to be able to load into flash
+.PHONY: port_postbuild
+port_postbuild:
+
+# Target: port_postrun
+# 	Do platform specific after run stuff.
+#	E.g. reset the board, backup the logfiles etc.
+.PHONY: port_postrun
+port_postrun:
+
+# Target: port_prerun
+# 	Do platform specific after run stuff.
+#	E.g. reset the board, backup the logfiles etc.
+.PHONY: port_prerun
+port_prerun:
+
+# Target: port_postload
+# 	Do platform specific after load stuff.
+#	E.g. reset the reset power to the flash eraser
+.PHONY: port_postload
+port_postload:
+
+# Target: port_preload
+# 	Do platform specific before load stuff.
+#	E.g. reset the reset power to the flash eraser
+.PHONY: port_preload
+port_preload:
+
+# FLAG: OPATH
+# Path to the output folder. Default - current folder.
+OPATH = ./
+MKDIR = mkdir -p
+
+# FLAG: PERL
+# Define perl executable to calculate the geomean if running separate.
+PERL=/usr/bin/perl
--- a/benchmarks/coremark/riscv64-baremetal/crt.S
+++ b/benchmarks/coremark/riscv64-baremetal/crt.S
@ -0,0 +1,237 @@
+# See LICENSE for license details.
+
+#include "encoding.h"
+
+#if __riscv_xlen == 64
+# define LREG ld
+# define SREG sd
+# define REGBYTES 8
+#else
+# define LREG lw
+# define SREG sw
+# define REGBYTES 4
+#endif
+
+  .section ".text.init"
+  .globl _start
+_start:
+  li  x1, 0
+  li  x2, 0
+  li  x3, 0
+  li  x4, 0
+  li  x5, 0
+  li  x6, 0
+  li  x7, 0
+  li  x8, 0
+  li  x9, 0
+  li  x10,0
+  li  x11,0
+  li  x12,0
+  li  x13,0
+  li  x14,0
+  li  x15,0
+  li  x16,0
+  li  x17,0
+  li  x18,0
+  li  x19,0
+  li  x20,0
+  li  x21,0
+  li  x22,0
+  li  x23,0
+  li  x24,0
+  li  x25,0
+  li  x26,0
+  li  x27,0
+  li  x28,0
+  li  x29,0
+  li  x30,0
+  li  x31,0
+
+  # enable FPU and accelerator if present
+  li t0, MSTATUS_FS | MSTATUS_XS
+  csrs mstatus, t0
+
+  # make sure XLEN agrees with compilation choice
+  li t0, 1
+  slli t0, t0, 31
+#if __riscv_xlen == 64
+  bgez t0, 1f
+#else
+  bltz t0, 1f
+#endif
+2:
+  li a0, 1
+  sw a0, tohost, t0
+  j 2b
+1:
+
+#ifdef __riscv_flen
+  # initialize FPU if we have one
+  la t0, 1f
+  csrw mtvec, t0
+
+  fssr    x0
+  fmv.s.x f0, x0
+  fmv.s.x f1, x0
+  fmv.s.x f2, x0
+  fmv.s.x f3, x0
+  fmv.s.x f4, x0
+  fmv.s.x f5, x0
+  fmv.s.x f6, x0
+  fmv.s.x f7, x0
+  fmv.s.x f8, x0
+  fmv.s.x f9, x0
+  fmv.s.x f10,x0
+  fmv.s.x f11,x0
+  fmv.s.x f12,x0
+  fmv.s.x f13,x0
+  fmv.s.x f14,x0
+  fmv.s.x f15,x0
+  fmv.s.x f16,x0
+  fmv.s.x f17,x0
+  fmv.s.x f18,x0
+  fmv.s.x f19,x0
+  fmv.s.x f20,x0
+  fmv.s.x f21,x0
+  fmv.s.x f22,x0
+  fmv.s.x f23,x0
+  fmv.s.x f24,x0
+  fmv.s.x f25,x0
+  fmv.s.x f26,x0
+  fmv.s.x f27,x0
+  fmv.s.x f28,x0
+  fmv.s.x f29,x0
+  fmv.s.x f30,x0
+  fmv.s.x f31,x0
+1:
+#endif
+
+  # initialize trap vector
+  la t0, trap_entry
+  csrw mtvec, t0
+
+  # initialize global pointer
+.option push
+.option norelax
+  la gp, __global_pointer$
+.option pop
+
+  la  tp, _end + 63
+  and tp, tp, -64
+
+  # get core id
+  csrr a0, mhartid
+  # for now, assume only 1 core
+  li a1, 1
+1:bgeu a0, a1, 1b
+
+  # give each core 128KB of stack + TLS
+#define STKSHIFT 17
+  sll a2, a0, STKSHIFT
+  add tp, tp, a2
+  add sp, a0, 1
+  sll sp, sp, STKSHIFT
+  add sp, sp, tp
+
+  j _init
+
+  .align 2
+trap_entry:
+  addi sp, sp, -272
+
+  SREG x1, 1*REGBYTES(sp)
+  SREG x2, 2*REGBYTES(sp)
+  SREG x3, 3*REGBYTES(sp)
+  SREG x4, 4*REGBYTES(sp)
+  SREG x5, 5*REGBYTES(sp)
+  SREG x6, 6*REGBYTES(sp)
+  SREG x7, 7*REGBYTES(sp)
+  SREG x8, 8*REGBYTES(sp)
+  SREG x9, 9*REGBYTES(sp)
+  SREG x10, 10*REGBYTES(sp)
+  SREG x11, 11*REGBYTES(sp)
+  SREG x12, 12*REGBYTES(sp)
+  SREG x13, 13*REGBYTES(sp)
+  SREG x14, 14*REGBYTES(sp)
+  SREG x15, 15*REGBYTES(sp)
+  SREG x16, 16*REGBYTES(sp)
+  SREG x17, 17*REGBYTES(sp)
+  SREG x18, 18*REGBYTES(sp)
+  SREG x19, 19*REGBYTES(sp)
+  SREG x20, 20*REGBYTES(sp)
+  SREG x21, 21*REGBYTES(sp)
+  SREG x22, 22*REGBYTES(sp)
+  SREG x23, 23*REGBYTES(sp)
+  SREG x24, 24*REGBYTES(sp)
+  SREG x25, 25*REGBYTES(sp)
+  SREG x26, 26*REGBYTES(sp)
+  SREG x27, 27*REGBYTES(sp)
+  SREG x28, 28*REGBYTES(sp)
+  SREG x29, 29*REGBYTES(sp)
+  SREG x30, 30*REGBYTES(sp)
+  SREG x31, 31*REGBYTES(sp)
+
+  csrr a0, mcause
+  csrr a1, mepc
+  mv a2, sp
+  jal handle_trap
+  csrw mepc, a0
+
+  # Remain in M-mode after eret
+  li t0, MSTATUS_MPP
+  csrs mstatus, t0
+
+  LREG x1, 1*REGBYTES(sp)
+  LREG x2, 2*REGBYTES(sp)
+  LREG x3, 3*REGBYTES(sp)
+  LREG x4, 4*REGBYTES(sp)
+  LREG x5, 5*REGBYTES(sp)
+  LREG x6, 6*REGBYTES(sp)
+  LREG x7, 7*REGBYTES(sp)
+  LREG x8, 8*REGBYTES(sp)
+  LREG x9, 9*REGBYTES(sp)
+  LREG x10, 10*REGBYTES(sp)
+  LREG x11, 11*REGBYTES(sp)
+  LREG x12, 12*REGBYTES(sp)
+  LREG x13, 13*REGBYTES(sp)
+  LREG x14, 14*REGBYTES(sp)
+  LREG x15, 15*REGBYTES(sp)
+  LREG x16, 16*REGBYTES(sp)
+  LREG x17, 17*REGBYTES(sp)
+  LREG x18, 18*REGBYTES(sp)
+  LREG x19, 19*REGBYTES(sp)
+  LREG x20, 20*REGBYTES(sp)
+  LREG x21, 21*REGBYTES(sp)
+  LREG x22, 22*REGBYTES(sp)
+  LREG x23, 23*REGBYTES(sp)
+  LREG x24, 24*REGBYTES(sp)
+  LREG x25, 25*REGBYTES(sp)
+  LREG x26, 26*REGBYTES(sp)
+  LREG x27, 27*REGBYTES(sp)
+  LREG x28, 28*REGBYTES(sp)
+  LREG x29, 29*REGBYTES(sp)
+  LREG x30, 30*REGBYTES(sp)
+  LREG x31, 31*REGBYTES(sp)
+
+  addi sp, sp, 272
+  mret
+
+.section ".tdata.begin"
+.globl _tdata_begin
+_tdata_begin:
+
+.section ".tdata.end"
+.globl _tdata_end
+_tdata_end:
+
+.section ".tbss.end"
+.globl _tbss_end
+_tbss_end:
+
+.section ".tohost","aw",@progbits
+.align 6
+.globl tohost
+tohost: .dword 0
+.align 6
+.globl fromhost
+fromhost: .dword 0
--- a/benchmarks/coremark/riscv64-baremetal/encoding.h
+++ b/benchmarks/coremark/riscv64-baremetal/encoding.h
--- a/benchmarks/coremark/riscv64-baremetal/link.ld
+++ b/benchmarks/coremark/riscv64-baremetal/link.ld
@ -0,0 +1,66 @@
+/*======================================================================*/
+/* Proxy kernel linker script                                           */
+/*======================================================================*/
+/* This is the linker script used when building the proxy kernel. */
+
+/*----------------------------------------------------------------------*/
+/* Setup                                                                */
+/*----------------------------------------------------------------------*/
+
+/* The OUTPUT_ARCH command specifies the machine architecture where the
+   argument is one of the names used in the BFD library. More
+   specifically one of the entires in bfd/cpu-mips.c */
+
+OUTPUT_ARCH( "riscv" )
+ENTRY(_start)
+
+/*----------------------------------------------------------------------*/
+/* Sections                                                             */
+/*----------------------------------------------------------------------*/
+
+SECTIONS
+{
+
+  /* text: test code section */
+  . = 0x80000000;
+  .text.init : { *(.text.init) }
+
+  . = ALIGN(0x1000);
+  .tohost : { *(.tohost) }
+
+  .text : { *(.text) }
+
+  /* data segment */
+  .data : { *(.data) }
+
+  .sdata : {
+    __global_pointer$ = . + 0x800;
+    *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*)
+    *(.sdata .sdata.* .gnu.linkonce.s.*)
+  }
+
+  /* bss segment */
+  .sbss : {
+    *(.sbss .sbss.* .gnu.linkonce.sb.*)
+    *(.scommon)
+  }
+  .bss : { *(.bss) }
+
+  /* thread-local data segment */
+  .tdata :
+  {
+    _tls_data = .;
+    *(.tdata.begin)
+    *(.tdata)
+    *(.tdata.end)
+  }
+  .tbss :
+  {
+    *(.tbss)
+    *(.tbss.end)
+  }
+
+  /* End of uninitalized data segement */
+  _end = .;
+}
+
--- a/benchmarks/coremark/riscv64-baremetal/syscallbackup.c
+++ b/benchmarks/coremark/riscv64-baremetal/syscallbackup.c
--- a/benchmarks/coremark/riscv64-baremetal/syscalls.c
+++ b/benchmarks/coremark/riscv64-baremetal/syscalls.c
@ -0,0 +1,540 @@
+// See LICENSE for license details.
+
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <limits.h>
+#include <sys/signal.h>
+#include "util.h"
+#include "coremark.h"
+#include <stdlib.h>
+
+#define SYS_write 64
+
+#undef strcmp
+
+extern volatile uint64_t tohost;
+extern volatile uint64_t fromhost;
+
+
+void _send_char(char c) {
+/*#error "You must implement the method _send_char to use this file!\n";
+*/
+volatile unsigned char *THR=(unsigned char *)0x10000000;
+volatile unsigned char *LSR=(unsigned char *)0x10000005;
+
+while(!(*LSR&0b100000));
+*THR=c;
+while(!(*LSR&0b100000)); 
+}
+
+int sendstring(const char *p){
+  int n=0;
+    while (*p) {
+	_send_char(*p);
+	n++;
+	p++;
+  }
+
+  return n;
+}
+
+static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2)
+{
+  volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
+  magic_mem[0] = which;
+  magic_mem[1] = arg0;
+  magic_mem[2] = arg1;
+  magic_mem[3] = arg2;
+  __sync_synchronize();
+
+  tohost = (uintptr_t)magic_mem;
+  while (fromhost == 0)
+    ;
+  fromhost = 0;
+
+  __sync_synchronize();
+  return magic_mem[0];
+}
+
+#define NUM_COUNTERS 3
+static uintptr_t counters[NUM_COUNTERS];
+static char* counter_names[NUM_COUNTERS];
+
+void setStats(int enable)
+{
+  int i = 0;
+#define READ_CTR(name) do { \
+    while (i >= NUM_COUNTERS) ; \
+    uintptr_t csr = read_csr(name); \
+    if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
+    counters[i++] = csr; \
+  } while (0)
+
+  READ_CTR(mcycle);
+  READ_CTR(minstret);
+  READ_CTR(mhpmcounter3);
+  READ_CTR(mhpmcounter4);
+  READ_CTR(mhpmcounter5);
+  READ_CTR(mhpmcounter6);
+  READ_CTR(mhpmcounter7);
+  READ_CTR(mhpmcounter8);
+  READ_CTR(mhpmcounter9);
+  READ_CTR(mhpmcounter10);
+  READ_CTR(mhpmcounter11);
+  READ_CTR(mhpmcounter12);  
+
+#undef READ_CTR
+}
+
+void __attribute__((noreturn)) tohost_exit(uintptr_t code)
+{
+  tohost = (code << 1) | 1;
+  asm ("ecall");
+  exit(0);
+}
+
+uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
+{
+  tohost_exit(1337);
+}
+
+void exit(int code)
+{
+  tohost_exit(code);
+}
+
+void abort()
+{
+  exit(128 + SIGABRT);
+}
+
+void printstr(const char* s)
+{
+  syscall(SYS_write, 1, (uintptr_t)s, strlen(s));
+}
+
+void __attribute__((weak)) thread_entry(int cid, int nc)
+{
+  // multi-threaded programs override this function.
+  // for the case of single-threaded programs, only let core 0 proceed.
+  while (cid != 0);
+}
+
+int __attribute__((weak)) main(int argc, char** argv)
+{
+  // single-threaded programs override this function.
+  printstr("Implement main(), foo!\n");
+  return -1;
+}
+
+static void init_tls()
+{
+  register void* thread_pointer asm("tp");
+  extern char _tls_data;
+  extern __thread char _tdata_begin, _tdata_end, _tbss_end;
+  size_t tdata_size = &_tdata_end - &_tdata_begin;
+  memcpy(thread_pointer, &_tls_data, tdata_size);
+  size_t tbss_size = &_tbss_end - &_tdata_end;
+  memset(thread_pointer + tdata_size, 0, tbss_size);
+}
+
+void _init(int cid, int nc)
+{
+  init_tls();
+  thread_entry(cid, nc);
+
+  // only single-threaded programs should ever get here.
+  int ret = main(0, 0);
+
+  char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
+  char* pbuf = buf;
+  for (int i = 0; i < NUM_COUNTERS; i++)
+    if (counters[i])
+      pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
+  if (pbuf != buf)
+    printstr(buf);
+  counters[3] = read_csr(mhpmcounter3) - counters[3];
+  counters[4] = read_csr(mhpmcounter4) - counters[4];
+  counters[5] = read_csr(mhpmcounter5) - counters[5];
+  counters[6] = read_csr(mhpmcounter6) - counters[6];
+  counters[7] = read_csr(mhpmcounter7) - counters[7];
+  counters[8] = read_csr(mhpmcounter8) - counters[8];
+  counters[9] = read_csr(mhpmcounter9) - counters[9];
+  counters[10] = read_csr(mhpmcounter10) - counters[10];
+  counters[11] = read_csr(mhpmcounter11) - counters[11];
+  counters[12] = read_csr(mhpmcounter12) - counters[12];    
+
+  ee_printf("Load Stalls %d\n", counters[3]);
+  ee_printf("D-Cache Accesses %d\n", counters[11]);
+  ee_printf("D-Cache Misses %d\n", counters[12]);    
+  ee_printf("Branches %d\n", counters[5]);
+  ee_printf("Branches Miss Predictions %d\n", counters[4]);
+  ee_printf("BTB Misses %d\n", counters[6]);
+  ee_printf("Jump, JAL, JALR %d\n", counters[7]);
+  ee_printf("RAS Wrong %d\n", counters[8]);
+  ee_printf("Returns %d\n", counters[9]);
+  ee_printf("BP Class Wrong %d\n", counters[10]);
+  ee_printf("Done printing performance counters\n");
+
+  exit(ret);
+}
+
+#undef putchar
+int putchar(int ch)
+{
+  /*static __thread char buf[64] __attribute__((aligned(64)));
+  static __thread int buflen = 0;
+
+  buf[buflen++] = ch;
+
+  if (ch == '\n' || buflen == sizeof(buf))
+  {
+    syscall(SYS_write, 1, (uintptr_t)buf, buflen);
+    buflen = 0;
+  }
+
+  return 0;*/
+  _send_char(ch);
+  return 0;
+
+}
+
+void printhex(uint64_t x)
+{
+  char str[17];
+  int i;
+  for (i = 0; i < 16; i++)
+  {
+    str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10);
+    x >>= 4;
+  }
+  str[16] = 0;
+
+  printstr(str);
+}
+
+static inline void printnum(void (*putch)(int, void**), void **putdat,
+                    unsigned long long num, unsigned base, int width, int padc)
+{
+  unsigned digs[sizeof(num)*CHAR_BIT];
+  int pos = 0;
+
+  while (1)
+  {
+    digs[pos++] = num % base;
+    if (num < base)
+      break;
+    num /= base;
+  }
+
+  while (width-- > pos)
+    putch(padc, putdat);
+
+  while (pos-- > 0)
+    putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
+}
+
+static unsigned long long getuint(va_list *ap, int lflag)
+{
+  if (lflag >= 2)
+    return va_arg(*ap, unsigned long long);
+  else if (lflag)
+    return va_arg(*ap, unsigned long);
+  else
+    return va_arg(*ap, unsigned int);
+}
+
+static long long getint(va_list *ap, int lflag)
+{
+  if (lflag >= 2)
+    return va_arg(*ap, long long);
+  else if (lflag)
+    return va_arg(*ap, long);
+  else
+    return va_arg(*ap, int);
+}
+
+static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
+{
+  register const char* p;
+  const char* last_fmt;
+  register int ch, err;
+  unsigned long long num;
+  int base, lflag, width, precision, altflag;
+  char padc;
+
+  while (1) {
+    while ((ch = *(unsigned char *) fmt) != '%') {
+      if (ch == '\0')
+        return;
+      fmt++;
+      putch(ch, putdat);
+    }
+    fmt++;
+
+    // Process a %-escape sequence
+    last_fmt = fmt;
+    padc = ' ';
+    width = -1;
+    precision = -1;
+    lflag = 0;
+    altflag = 0;
+  reswitch:
+    switch (ch = *(unsigned char *) fmt++) {
+
+    // flag to pad on the right
+    case '-':
+      padc = '-';
+      goto reswitch;
+      
+    // flag to pad with 0's instead of spaces
+    case '0':
+      padc = '0';
+      goto reswitch;
+
+    // width field
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      for (precision = 0; ; ++fmt) {
+        precision = precision * 10 + ch - '0';
+        ch = *fmt;
+        if (ch < '0' || ch > '9')
+          break;
+      }
+      goto process_precision;
+
+    case '*':
+      precision = va_arg(ap, int);
+      goto process_precision;
+
+    case '.':
+      if (width < 0)
+        width = 0;
+      goto reswitch;
+
+    case '#':
+      altflag = 1;
+      goto reswitch;
+
+    process_precision:
+      if (width < 0)
+        width = precision, precision = -1;
+      goto reswitch;
+
+    // long flag (doubled for long long)
+    case 'l':
+      lflag++;
+      goto reswitch;
+
+    // character
+    case 'c':
+      putch(va_arg(ap, int), putdat);
+      break;
+
+    // string
+    case 's':
+      if ((p = va_arg(ap, char *)) == NULL)
+        p = "(null)";
+      if (width > 0 && padc != '-')
+        for (width -= strnlen(p, precision); width > 0; width--)
+          putch(padc, putdat);
+      for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
+        putch(ch, putdat);
+        p++;
+      }
+      for (; width > 0; width--)
+        putch(' ', putdat);
+      break;
+
+    // (signed) decimal
+    case 'd':
+      num = getint(&ap, lflag);
+      if ((long long) num < 0) {
+        putch('-', putdat);
+        num = -(long long) num;
+      }
+      base = 10;
+      goto signed_number;
+
+    // unsigned decimal
+    case 'u':
+      base = 10;
+      goto unsigned_number;
+
+    // (unsigned) octal
+    case 'o':
+      // should do something with padding so it's always 3 octits
+      base = 8;
+      goto unsigned_number;
+
+    // pointer
+    case 'p':
+      static_assert(sizeof(long) == sizeof(void*));
+      lflag = 1;
+      putch('0', putdat);
+      putch('x', putdat);
+      /* fall through to 'x' */
+
+    // (unsigned) hexadecimal
+    case 'X':
+    case 'x':
+      base = 16;
+    unsigned_number:
+      num = getuint(&ap, lflag);
+    signed_number:
+      printnum(putch, putdat, num, base, width, padc);
+      break;
+
+    // escaped '%' character
+    case '%':
+      putch(ch, putdat);
+      break;
+      
+    // unrecognized escape sequence - just print it literally
+    default:
+      putch('%', putdat);
+      fmt = last_fmt;
+      break;
+    }
+  }
+}
+
+int printf(const char* fmt, ...)
+{
+  va_list ap;
+  va_start(ap, fmt);
+
+  vprintfmt((void*)putchar, 0, fmt, ap);
+
+  va_end(ap);
+  return 0; // incorrect return value, but who cares, anyway?
+}
+
+int puts(const char* s)
+{
+  printf(s);
+  printf("\n");
+  return 0; // incorrect return value, but who cares, anyway?
+}
+
+int sprintf(char* str, const char* fmt, ...)
+{
+  va_list ap;
+  char* str0 = str;
+  va_start(ap, fmt);
+
+  void sprintf_putch(int ch, void** data)
+  {
+    char** pstr = (char**)data;
+    **pstr = ch;
+    (*pstr)++;
+  }
+
+  vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
+  *str = 0;
+
+  va_end(ap);
+  return str - str0;
+}
+
+void* memcpy(void* dest, const void* src, size_t len)
+{
+  if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) {
+    const uintptr_t* s = src;
+    uintptr_t *d = dest;
+    while (d < (uintptr_t*)(dest + len))
+      *d++ = *s++;
+  } else {
+    const char* s = src;
+    char *d = dest;
+    while (d < (char*)(dest + len))
+      *d++ = *s++;
+  }
+  return dest;
+}
+void* memset(void* dest, int byte, size_t len)
+{
+  if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) {
+    uintptr_t word = byte & 0xFF;
+    word |= word << 8;
+    word |= word << 16;
+    word |= word << 16 << 16;
+
+    uintptr_t *d = dest;
+    while (d < (uintptr_t*)(dest + len)){
+      *d = word;
+      d++;}
+  } else {
+    char *d = dest;
+    while (d < (char*)(dest + len)){
+      *d = byte;
+      d++;}
+  }
+  return dest;
+}
+
+size_t strlen(const char *s)
+{
+  const char *p = s;
+  while (*p)
+    p++;
+  return p - s;
+}
+
+size_t strnlen(const char *s, size_t n)
+{
+  const char *p = s;
+  while (n-- && *p)
+    p++;
+  return p - s;
+}
+
+int strcmp(const char* s1, const char* s2)
+{
+  unsigned char c1, c2;
+
+  do {
+    c1 = *s1++;
+    c2 = *s2++;
+  } while (c1 != 0 && c1 == c2);
+
+  return c1 - c2;
+}
+
+char* strcpy(char* dest, const char* src)
+{
+  char* d = dest;
+  while ((*d++ = *src++))
+    ;
+  return dest;
+}
+
+long atol(const char* str)
+{
+  long res = 0;
+  int sign = 0;
+
+  while (*str == ' ')
+    str++;
+
+  if (*str == '-' || *str == '+') {
+    sign = *str == '-';
+    str++;
+  }
+
+  while (*str) {
+    res *= 10;
+    res += *str++ - '0';
+  }
+
+  return sign ? -res : res;
+}
--- a/benchmarks/coremark/riscv64-baremetal/util.h
+++ b/benchmarks/coremark/riscv64-baremetal/util.h
@ -0,0 +1,90 @@
+// See LICENSE for license details.
+
+#ifndef __UTIL_H
+#define __UTIL_H
+
+extern void setStats(int enable);
+
+#include <stdint.h>
+
+#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
+
+static int verify(int n, const volatile int* test, const int* verify)
+{
+  int i;
+  // Unrolled for faster verification
+  for (i = 0; i < n/2*2; i+=2)
+  {
+    int t0 = test[i], t1 = test[i+1];
+    int v0 = verify[i], v1 = verify[i+1];
+    if (t0 != v0) return i+1;
+    if (t1 != v1) return i+2;
+  }
+  if (n % 2 != 0 && test[n-1] != verify[n-1])
+    return n;
+  return 0;
+}
+
+static int verifyDouble(int n, const volatile double* test, const double* verify)
+{
+  int i;
+  // Unrolled for faster verification
+  for (i = 0; i < n/2*2; i+=2)
+  {
+    double t0 = test[i], t1 = test[i+1];
+    double v0 = verify[i], v1 = verify[i+1];
+    int eq1 = t0 == v0, eq2 = t1 == v1;
+    if (!(eq1 & eq2)) return i+1+eq1;
+  }
+  if (n % 2 != 0 && test[n-1] != verify[n-1])
+    return n;
+  return 0;
+}
+
+static void __attribute__((noinline)) barrier(int ncores)
+{
+  static volatile int sense;
+  static volatile int count;
+  static __thread int threadsense;
+
+  __sync_synchronize();
+
+  threadsense = !threadsense;
+  if (__sync_fetch_and_add(&count, 1) == ncores-1)
+  {
+    count = 0;
+    sense = threadsense;
+  }
+  else while(sense != threadsense)
+    ;
+
+  __sync_synchronize();
+}
+
+static uint64_t lfsr(uint64_t x)
+{
+  uint64_t bit = (x ^ (x >> 1)) & 1;
+  return (x >> 1) | (bit << 62);
+}
+
+static uintptr_t insn_len(uintptr_t pc)
+{
+  return (*(unsigned short*)pc & 3) ? 4 : 2;
+}
+
+#ifdef __riscv
+#include "encoding.h"
+#endif
+
+#define stringify_1(s) #s
+#define stringify(s) stringify_1(s)
+#define stats(code, iter) do { \
+    unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \
+    code; \
+    _c += read_csr(mcycle), _i += read_csr(minstret); \
+    if (cid == 0) \
+      printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
+             stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
+  } while(0)
+
+#endif //__UTIL_H
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@ -2,6 +2,8 @@
 # Expanded and developed by Daniel Torres dtorres@hmc.edu
 # Compile Embench for Wally

+embench_dir = ../../addins/embench-iot
+
 all: sim size

 allClean: clean all
@ -10,12 +12,12 @@ build: buildspeed buildsize

 # uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed
 buildspeed:
-	../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles" 
-	find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
+	$(embench_dir)/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles" 
+	find $(embench_dir)/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done

 # uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for size
 buildsize:
-	../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
+	$(embench_dir)/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"

 # builds dependencies, then launches modelsim and finally runs python wrapper script to present results
 sim: modelsim_build_memfile modelsim_run speed
@ -27,36 +29,36 @@ modelsim_run:

 # builds the objdump based on the compiled c elf files
 objdump: buildspeed
-	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
+	find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done

 # build memfiles, objdump.lab and objdump.addr files
 modelsim_build_memfile: objdump
-	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
-	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
+	find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
+	find $(embench_dir)/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done

 # builds the tests for speed, runs them on spike and then launches python script to present results
 # note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim
 # you'll need to manually remove one of the two .output files, or run make clean
-spike: buildspeed spikecmd speed
+spike: buildspeed objdump spike_run speed

 # command to run spike on all of the benchmarks
-spike_run: buildspeed
-	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
+spike_run:
+	find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done

 # python wrapper to present results of embench size benchmark
 size: buildsize
-	../../addins/embench-iot/benchmark_size.py --builddir=bd_size
+	$(embench_dir)/benchmark_size.py --builddir=bd_size --json-output > wallySize.json

 # python wrapper to present results of embench speed benchmark
 speed:
-	../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1
+	$(embench_dir)/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeed.json

 # deletes all files
 clean: 
-	rm -rf ../../addins/embench-iot/bd_speed/
-	rm -rf ../../addins/embench-iot/bd_size/
+	rm -rf $(embench_dir)/bd_speed/
+	rm -rf $(embench_dir)/bd_size/

 allclean: clean
-	rm -rf ../../addins/embench-iot/logs/
+	rm -rf $(embench_dir)/logs/

 # riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
--- a/benchmarks/graphGen.py
+++ b/benchmarks/graphGen.py
@ -0,0 +1,81 @@
+#!/usr/bin/env python
+import subprocess
+import sys
+import json
+import plotly.graph_objects as go
+
+coremarkData = {}
+embenchData = {}
+debug = True
+
+def loadCoremark():
+    """loads the coremark data dictionary"""
+    coremarkPath = "riscv-coremark/work/coremark.sim.log"
+    
+    keywordlist = ["CoreMark 1.0", "CoreMark Size", "MTIME", "MINSTRET", "Branches Miss Predictions", "BTB Misses"]
+    for keyword in keywordlist:
+        bashInst = "cat " + coremarkPath + " | grep \"" + keyword +  "\" | cut -d \':\' -f 2 | cut -d \" \" -f 2 | tail -1"
+        result = subprocess.run(bashInst, stdout=subprocess.PIPE, shell=True)
+        if (debug): print(result)
+        coremarkData[keyword] = int(result.stdout)
+    if (debug): print(coremarkData)
+    return coremarkData
+
+def loadEmbench():
+    """loads the embench data dictionary"""
+    embenchPath = "embench/wallySpeed.json"
+    f = open(embenchPath)
+    embenchData = json.load(f)
+    if (debug): print(embenchData)
+    return embenchData
+
+def graphEmbench(embenchData):
+    ydata = list(embenchData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
+    xdata = list(embenchData["speed results"]["detailed speed results"].values()) + [embenchData["speed results"]["speed geometric mean"],embenchData["speed results"]["speed geometric sd"],embenchData["speed results"]["speed geometric range"]]
+    fig = go.Figure(go.Bar(
+            y=ydata,
+            x=xdata,
+            orientation='h'))
+
+    fig.show()
+
+
+def main():
+    coremarkData = loadCoremark()
+    embenchData = loadEmbench()
+    graphEmbench(embenchData)
+
+if __name__ == '__main__':
+    sys.exit(main())
+
+# x = 
+# y = 
+
+# df = px.data.tips()
+# fig = px.bar(df, x="total_bill", y="day", orientation='h')
+# fig.show()
+# import plotly.express as px
+
+
+# result = sp.run(['ls', '-l'], stdout=sp.PIPE)
+# result.stdout
+
+# fig = go.Figure( go.Bar(
+#                 x=[],
+#                 y=[],
+#                 color="species",
+#                 facet_col="species", 
+#                 title="Using update_traces() With Plotly Express Figures"),
+#                 orientation='h')
+
+# fig.show()
+
+#
+# "ls -Art ../addins/embench-iot/logs/*speed* | tail -n 1 " # gets most recent embench speed log
+# "ls -Art ../addins/embench-iot/logs/*size* | tail -n 1 " # gets most recent embench speed log
+
+## get coremark score
+
+# cat coremarkPath | grep "CoreMark 1.0" | cut -d ':' -f 2 | cut -d " " -f 2
+# cat coremarkPath | grep "MTIME" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
+# cat coremarkPath | grep "MINSTRET" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
--- a/benchmarks/riscv-coremark/riscv64-baremetal/syscalls.c
+++ b/benchmarks/riscv-coremark/riscv64-baremetal/syscalls.c
@ -166,17 +166,17 @@ void _init(int cid, int nc)
  counters[11] = read_csr(mhpmcounter11) - counters[11];
  counters[12] = read_csr(mhpmcounter12) - counters[12];    

-  ee_printf("Load Stalls %d\n", counters[3]);
-  ee_printf("D-Cache Accesses %d\n", counters[11]);
-  ee_printf("D-Cache Misses %d\n", counters[12]);    
-  ee_printf("Branches %d\n", counters[5]);
-  ee_printf("Branches Miss Predictions %d\n", counters[4]);
-  ee_printf("BTB Misses %d\n", counters[6]);
-  ee_printf("Jump, JAL, JALR %d\n", counters[7]);
-  ee_printf("RAS Wrong %d\n", counters[8]);
-  ee_printf("Returns %d\n", counters[9]);
-  ee_printf("BP Class Wrong %d\n", counters[10]);
-  ee_printf("Done printing performance counters\n");
+  ee_printf("Load Stalls : %d\n", counters[3]);
+  ee_printf("D-Cache Accesses : %d\n", counters[11]);
+  ee_printf("D-Cache Misses : %d\n", counters[12]);    
+  ee_printf("Branches : %d\n", counters[5]);
+  ee_printf("Branches Miss Predictions : %d\n", counters[4]);
+  ee_printf("BTB Misses : %d\n", counters[6]);
+  ee_printf("Jump, JAL, JALR : %d\n", counters[7]);
+  ee_printf("RAS Wrong : %d\n", counters[8]);
+  ee_printf("Returns : %d\n", counters[9]);
+  ee_printf("BP Class Wrong : %d\n", counters[10]);
+  ee_printf("Done printing performance counters : \n");

  exit(ret);
 }
--- a/examples/verilog/fma/Makefile
+++ b/examples/verilog/fma/Makefile
@ -0,0 +1,23 @@
+# Makefile
+
+CC     = gcc
+CFLAGS = -O3
+LIBS   = -lm
+LFLAGS = -L. 
+# Link against the riscv-isa-sim version of SoftFloat rather than 
+# the regular version to get RISC-V NaN behavior
+IFLAGS   = -I$(RISCV)/riscv-isa-sim/softfloat
+LIBS   = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
+#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
+#LIBS   = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
+SRCS   = $(wildcard *.c)
+
+PROGS = $(patsubst %.c,%,$(SRCS))
+
+all:	$(PROGS)
+
+%: %.c
+	$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
+
+clean: 
+	rm -f $(PROGS)
--- a/examples/verilog/fma/baby_torture.tv
+++ b/examples/verilog/fma/baby_torture.tv
--- a/examples/verilog/fma/baby_torture_rz.tv
+++ b/examples/verilog/fma/baby_torture_rz.tv
--- a/examples/verilog/fma/fma.do
+++ b/examples/verilog/fma/fma.do
@ -0,0 +1,23 @@
+# fma.do 
+#
+# run with vsim -do "do fma.do"
+# add -c before -do for batch simulation
+
+onbreak {resume}
+
+# create library
+vlib worklib
+
+vlog -lint -sv -work worklib fma16.v testbench.v
+vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
+vsim -lib worklib testbenchopt
+
+add wave sim:/testbench_fma16/clk
+add wave sim:/testbench_fma16/reset
+add wave sim:/testbench_fma16/x
+add wave sim:/testbench_fma16/y
+add wave sim:/testbench_fma16/z
+add wave sim:/testbench_fma16/result
+add wave sim:/testbench_fma16/rexpected
+
+run -all
--- a/examples/verilog/fma/fma16.v
+++ b/examples/verilog/fma/fma16.v
@ -0,0 +1,268 @@
+// fma16.sv
+// David_Harris@hmc.edu 26 February 2022
+// 16-bit floating-point multiply-accumulate
+
+// Operation: general purpose multiply, add, fma, with optional negation
+//   If mul=1, p = x * y.  Else p = x.
+//   If add=1, result = p + z.  Else result = p.
+//   If negr or negz = 1, negate result or z to handle negations and subtractions
+//   fadd: mul = 0, add = 1, negr = negz = 0
+//   fsub: mul = 0, add = 1, negr = 0, negz = 1
+//   fmul: mul = 1, add = 0, negr = 0, negz = 0
+//   fmadd:  mul = 1, add = 1, negr = 0, negz = 0
+//   fmsub:  mul = 1, add = 1, negr = 0, negz = 1
+//   fnmadd: mul = 1, add = 1, negr = 1, negz = 0
+//   fnmsub: mul = 1, add = 1, negr = 1, negz = 1
+
+`define FFLEN 16
+`define Nf 10
+`define Ne 5
+`define BIAS 15
+`define EMIN (-(2**(`Ne-1)-1))
+`define EMAX (2**(`Ne-1)-1)
+
+`define NaN 16'h7E00
+`define INF 15'h7C00
+
+// rounding modes *** update
+`define RZ  3'b00
+`define RNE 3'b01
+`define RM  3'b10
+`define RP  3'b11
+
+module fma16(
+  input  logic [`FFLEN-1:0] x, y, z,
+  input  logic        mul, add, negr, negz,
+  input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
+  output logic [`FFLEN-1:0] result);
+ 
+  logic [`Nf:0] xm, ym, zm; // U1.Nf
+  logic [`Ne-1:0]  xe, ye, ze; // B_Ne
+  logic        xs, ys, zs;
+  logic        zs1; // sign before optional negation
+  logic [2*`Nf+1:0] pm; // U2.2Nf
+  logic [`Ne:0]  pe; // B_Ne+1
+  logic        ps;  // sign of product
+  logic [22:0] rm;
+  logic [`Ne+1:0]  re;
+  logic        rs;
+  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
+  logic [`Ne+1:0]  re2;
+
+  unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);  // unpack inputs
+  //signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs);             // handle negations
+  mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps);                       // p = x * y
+  add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs);             // r = z + p
+  postproc16 post(roundmode,  xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result);                 // normalize, round, pack
+endmodule
+
+module mult16(
+  input  logic        mul,
+  input  logic [`Nf:0] xm, ym,
+  input  logic [`Ne-1:0]  xe, ye,
+  input  logic        xs, ys,
+  output logic [2*`Nf+1:0] pm,
+  output logic [`Ne:0]  pe,
+  output logic        ps);
+
+  // only multiply if mul = 1
+  assign pm = mul ? xm * ym : {1'b0, xm, 10'b0};       // multiply mantiassas 
+  assign pe = mul ? xe + ye - `BIAS : {1'b0, xe};      // add exponents, account for bias
+  assign ps = xs ^ ys;                                 // negative if X xor Y are negative
+endmodule
+
+module add16(
+  input  logic        add,
+  input  logic [2*`Nf+1:0] pm,  // U2.2Nf
+  input  logic [`Nf:0] zm, // U1.Nf
+  input  logic [`Ne:0]  pe, // B_Ne+1
+  input  logic [`Ne-1:0]  ze, // B_Ne
+  input  logic        ps, zs, 
+  input  logic        negz,
+  output logic [22:0] rm,
+  output logic [`Ne+1:0]  re, // B_Ne+2
+  output logic [`Ne+1:0]  re2,
+  output logic        rs);
+
+  logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
+  logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
+  logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.  
+  logic [`Nf-1:0] prezsticky;
+  logic           zsticky;
+  logic          effectivesub;
+  logic           rs0;
+  logic [`Ne:0]     leadingzeros, NormCnt; // *** should paramterize size
+  logic [`Ne:0]   re1;
+
+  // Alignment shift
+  assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
+  assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
+  always_comb // AlignCount mux; see Muller page 254
+    if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7;         re = {1'b0, pe}; end
+    else if (ExpDiff <= 2)       begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
+    else if (ExpDiff <= `Nf+3)   begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
+    else                         begin AlignCnt = 0;                 re = {2'b0, ze}; end
+  // Shift Zm right by AlignCnt.  Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
+  assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
+  assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
+  
+  // Effective subtraction
+  assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
+  assign zalignedaddsub = effectivesub ? ~zaligned : zaligned;  // invert zaligned for subtraction
+
+  // Adder
+  assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
+  assign rs0 = r[`Nf*3+7]; // sign of the initial result
+  assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
+
+  // Sign Logic
+  assign rs = ps ^ rs0; // flip the sign if necessary
+
+  // Leading zero counter
+  lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
+  assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
+
+  // Normalization shift
+  always_comb // NormCount mux
+    if (ExpDiff < 3) begin 
+      if (re1 >= `EMIN) begin  NormCnt = `Nf + 3 + leadingzeros;  re2 = {1'b0, re1}; end
+      else              begin  NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN;    end
+    end else            begin  NormCnt = AlignCnt; re = {2'b00, ze};                  end
+  assign rnormed = r2 << NormCnt; // *** update sticky
+  /* temporarily comment out to start synth
+
+  // One-bit secondary normalization
+  if (ExpDiff <= 2)          begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
+  else begin // *** handle sticky
+    if (rnormed[***])        begin rnormed2 = rnormed >> 1; re2 = re+1; end
+    else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re;        end
+    else                     begin rnormed2 = rnormed << 1; re2 = re-1; end
+  end
+
+  // round
+  assign l = rnormed2[***]; // least significant bit 
+  assign r = rnormed2[***-1]; // rounding bit
+  assign s = ***; // sticky bit
+  always_comb
+    case (roundmode)
+      RZ: roundup = 0;
+      RP: roundup = ~rs & (r | s); 
+      RM: roundup = rs & (r | s);
+      RNE: roundup = r & (s | l);
+      default: roundup = 0;
+    endcase
+  assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
+*/
+
+  // *** need to handle rounding to MAXNUM vs. INFINITY
+  
+  // add or pass product through
+ /* assign rm = add ? arm : {1'b0, pm};
+  assign re = add ? are : {1'b0, pe};
+  assign rs = add ? ars : ps; */
+endmodule
+
+module lzc(
+  input  logic [`Nf*3+7:0] r2,
+  output logic [`Ne:0]   leadingzeros
+);
+
+endmodule
+
+
+module postproc16(
+  input  logic [1:0] roundmode,
+  input  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
+  input  logic [22:0] rm, 
+  input  logic [`Nf:0] zm, // U1.Nf
+  input  logic [6:0]  re, 
+  input  logic [`Ne-1:0]  ze, // B_Ne
+  input  logic        rs, zs, ps,
+  input  logic [`Ne+1:0]  re2,
+  output logic [15:0] result);
+
+  logic [9:0] uf, uff;
+  logic [6:0] ue;
+  logic [6:0] ueb, uebiased;
+  logic       invalid;
+
+    // Special cases
+  // *** not handling signaling NaN
+  // *** also add overflow/underflow/inexact
+  always_comb begin
+    if (xnan | ynan | znan)                    begin result = `NaN; invalid = 0; end // propagate NANs
+    else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
+    else if (xzero & yinf | xinf & yzero)      begin result = `NaN; invalid = 1; end // zero times infinity
+    else if (xinf | yinf)                      begin result = {ps, `INF}; invalid = 0; end // X or Y
+    else if (zinf)                             begin result = {zs, `INF}; invalid = 0; end // infinite Z
+    else if (xzero | yzero)                    begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
+    else if (re2 >= `EMAX)                     begin result = {rs, `INF}; invalid = 0; end
+    else                                       begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
+  end
+  
+  always_comb 
+    if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
+        ue = re + 7'b1;
+        uf = rm[20:11];
+    end else begin // no normalization shift needed
+        ue = re;
+        uf = rm[19:10];
+    end
+
+  // overflow
+  always_comb begin
+    ueb = ue-7'd15;
+    if (ue >= 7'd46) begin // overflow
+/*      uebiased = 7'd30;
+      uff = 10'h3ff; */
+    end else begin
+      uebiased = ue-7'd15;
+      uff = uf;
+    end
+  end
+  
+  assign result = {rs, uebiased[4:0], uff};
+
+  // add special case handling for zeros, NaN, Infinity
+endmodule
+
+module signadj16(
+  input  logic negr, negz,
+  input  logic xs, ys, zs1,
+  output logic ps, zs);
+
+  assign ps = xs ^ ys; // sign of product
+  assign zs = zs1 ^ negz; // sign of addend
+endmodule
+
+module unpack16(
+  input  logic [15:0] x, y, z,
+  output logic [10:0] xm, ym, zm,
+  output logic [4:0]  xe, ye, ze,
+  output logic        xs, ys, zs,
+  output logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
+
+  unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
+  unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
+  unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
+endmodule
+
+module unpacknum16(
+  input logic  [15:0] num,
+  output logic [10:0] m,
+  output logic [4:0]  e,
+  output logic        s, 
+  output logic        zero, inf, nan);
+
+  logic [9:0] f;  // fraction without leading 1
+  logic [4:0] eb; // biased exponent
+
+  assign {s, eb, f} = num; // pull bit fields out of floating-point number
+  assign m = {1'b1, f}; // prepend leading 1 to fraction
+  assign e = eb;   // leave bias in exponent ***
+  assign zero = (e == 0 && f == 0);
+  assign inf = (e == 31 && f == 0);
+  assign nan = (e == 31 && f != 0);
+endmodule
+
+
--- a/examples/verilog/fma/fma16_template.v
+++ b/examples/verilog/fma/fma16_template.v
@ -0,0 +1,24 @@
+// fma16.sv
+// David_Harris@hmc.edu 26 February 2022
+// 16-bit floating-point multiply-accumulate
+
+// Operation: general purpose multiply, add, fma, with optional negation
+//   If mul=1, p = x * y.  Else p = x.
+//   If add=1, result = p + z.  Else result = p.
+//   If negr or negz = 1, negate result or z to handle negations and subtractions
+//   fadd: mul = 0, add = 1, negr = negz = 0
+//   fsub: mul = 0, add = 1, negr = 0, negz = 1
+//   fmul: mul = 1, add = 0, negr = 0, negz = 0
+//   fmadd:  mul = 1, add = 1, negr = 0, negz = 0
+//   fmsub:  mul = 1, add = 1, negr = 0, negz = 1
+//   fnmadd: mul = 1, add = 1, negr = 1, negz = 0
+//   fnmsub: mul = 1, add = 1, negr = 1, negz = 1
+
+module fma16(
+  input  logic [15:0] x, y, z,
+  input  logic        mul, add, negr, negz,
+  input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
+  output logic [15:0] result);
+ 
+endmodule
+
--- a/examples/verilog/fma/fma16_testgen.c
+++ b/examples/verilog/fma/fma16_testgen.c
@ -0,0 +1,240 @@
+#include <stdio.h>
+#include <stdint.h>
+#include "softfloat.h"
+#include "softfloat_types.h"
+
+typedef union sp {
+  float32_t v;
+  float f;
+} sp;
+
+// lists of tests, terminated with 0x8000
+uint16_t easyExponents[] = {15, 0x8000};
+uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
+uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
+uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
+uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000}; 
+uint16_t zeros[] = {0x0000, 0x8000};
+uint16_t infs[] = {0x7C00, 0xFC00};
+uint16_t nans[] = {0x7D00, 0x7D01};
+
+void softfloatInit(void) {
+    softfloat_roundingMode = softfloat_round_minMag; 
+    softfloat_exceptionFlags = 0;
+    softfloat_detectTininess = softfloat_tininess_beforeRounding;
+}
+
+float convFloat(float16_t f16) {
+    float32_t f32;
+    float res;
+    sp r;
+
+    f32 = f16_to_f32(f16);
+    r.v = f32;
+    res = r.f;
+    return res;
+}
+
+void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    float16_t result;
+    int op, flagVals;
+    char calc[80], flags[80];
+    float32_t x32, y32, z32, r32;
+    float xf, yf, zf, rf;
+    float16_t smallest;
+
+    if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
+    if (!add) z.v = 0x0000; // force z to 0 to avoid add
+    if (negp) x.v ^= 0x8000; // flip sign of x to negate p
+    if (negz) z.v ^= 0x8000; // flip sign of z to negate z
+    op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
+//    printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
+    softfloat_exceptionFlags = 0; // clear exceptions
+    result = f16_mulAdd(x, y, z);
+
+    sprintf(flags, "NV: %d OF: %d UF: %d NX: %d", 
+        (softfloat_exceptionFlags >> 4) % 2,
+        (softfloat_exceptionFlags >> 2) % 2,
+        (softfloat_exceptionFlags >> 1) % 2,
+        (softfloat_exceptionFlags) % 2);
+    // pack these four flags into one nibble, discarding DZ flag
+    flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
+
+
+    // convert to floats for printing
+    xf = convFloat(x);
+    yf = convFloat(y);
+    zf = convFloat(z);
+    rf = convFloat(result);
+    if (mul)
+        if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
+        else     sprintf(calc, "%f * %f = %f", xf, yf, rf);
+    else         sprintf(calc, "%f + %f = %f", xf, zf, rf);
+
+    // omit denorms, which aren't required for this project
+    smallest.v = 0x0400;
+    float16_t resultmag = result;
+    resultmag.v &= 0x7FFF; // take absolute value
+    if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
+    if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
+    if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed)  fprintf(fptr, "// Skip inf: ");
+    if (resultmag.v >  0x7C00 && !nanAllowed)  fprintf(fptr, "// Skip NaN: ");
+    fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
+}
+
+void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases, 
+               FILE *fptr, int *numCases) {
+    int i, j;
+
+    fprintf(fptr, desc); fprintf(fptr, "\n");
+    *numCases=0;
+    for (i=0; e[i] != 0x8000; i++)
+        for (j=0; f[j] != 0x8000; j++) {
+            cases[*numCases].v = f[j] | e[i]<<10;
+            *numCases = *numCases + 1;
+        }
+}
+
+void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    z.v = 0x0000;
+    for (i=0; i < numCases; i++) { 
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            y.v = cases[j].v;
+            for (k=0; k<=sgn; k++) {
+                y.v ^= (k<<15);
+                genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    y.v = 0x0000;
+    for (i=0; i < numCases; i++) {
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            z.v = cases[j].v;
+            for (k=0; k<=sgn; k++) {
+                z.v ^= (k<<15);
+                genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+
+void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, l, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    for (i=0; i < numCases; i++) {
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            y.v = cases[j].v;
+            for (k=0; k<numCases; k++) {
+                z.v = cases[k].v;
+                for (l=0; l<=sgn; l++) {
+                    z.v ^= (l<<15);
+                    genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+                }
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, sx, sy, sz, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    cases[numCases].v = 0x0000; // add +0 case
+    cases[numCases+1].v = 0x8000; // add -0 case
+    numCases += 2; 
+    for (i=0; i < numCases; i++) {
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            y.v = cases[j].v;
+            for (k=0; k<numCases; k++) {
+                z.v = cases[k].v;
+                for (sx=0; sx<=sgn; sx++) {
+                    x.v ^= (sx<<15);
+                    for (sy=0; sy<=sgn; sy++) {
+                        y.v ^= (sy<<15);
+                        for (sz=0; sz<=sgn; sz++) {
+                            z.v ^= (sz<<15);
+                            genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+
+int main()
+{
+    softfloatInit(); // configure softfloat modes
+ 
+    // Test cases: multiplication
+    genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
+    genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
+    genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
+
+    // Test cases: addition
+    genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
+    genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
+    genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
+
+    // Test cases: FMA
+    genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
+    genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
+    genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
+
+    // Test cases: Zero, Infinity, NaN
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
+ 
+    // Full test cases with other rounding modes
+    softfloat_roundingMode = softfloat_round_near_even; 
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
+    softfloat_roundingMode = softfloat_round_min; 
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
+    softfloat_roundingMode = softfloat_round_max; 
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
+  
+    return 0;
+}
--- a/examples/verilog/fma/lint-fma
+++ b/examples/verilog/fma/lint-fma
@ -0,0 +1,8 @@
+#!/bin/bash
+# check for warnings in Verilog code
+# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
+export PATH=$PATH:/usr/local/bin/
+verilator=`which verilator`
+
+basepath=$(dirname $0)/..
+$verilator --lint-only --top-module fma16 fma16.v
--- a/examples/verilog/fma/sim-fma
+++ b/examples/verilog/fma/sim-fma
@ -0,0 +1,2 @@
+vsim -do "do fma.do"
+
--- a/examples/verilog/fma/sim-fma-batch
+++ b/examples/verilog/fma/sim-fma-batch
@ -0,0 +1 @@
+vsim -c -do "do fma.do"
--- a/examples/verilog/fma/synth
+++ b/examples/verilog/fma/synth
@ -0,0 +1 @@
+make -C ../../../synthDC synth DESIGN=fma16
--- a/examples/verilog/fma/testbench.v
+++ b/examples/verilog/fma/testbench.v
@ -0,0 +1,52 @@
+/* verilator lint_off STMTDLY */
+module testbench_fma16;
+  reg        clk, reset;
+  reg [15:0] x, y, z, rexpected;
+  wire [15:0] result;
+  reg [7:0]  ctrl;
+  reg [3:0]  flagsexpected;
+  reg        mul, add, negp, negz;
+  reg [1:0]  roundmode;
+  reg [31:0] vectornum, errors;
+  reg [75:0] testvectors[10000:0];
+
+  // instantiate device under test
+  fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
+
+  // generate clock
+  always 
+    begin
+      clk = 1; #5; clk = 0; #5;
+    end
+
+  // at start of test, load vectors and pulse reset
+  initial
+    begin
+      $readmemh("work/fmul_0.tv", testvectors);
+      vectornum = 0; errors = 0;
+      reset = 1; #22; reset = 0;
+    end
+
+  // apply test vectors on rising edge of clk
+  always @(posedge clk)
+    begin
+      #1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
+      {roundmode, mul, add, negp, negz} = ctrl[5:0];
+    end
+
+  // check results on falling edge of clk
+  always @(negedge clk)
+    if (~reset) begin // skip during reset
+      if (result !== rexpected) begin  // check result     // *** should also add tests on flags eventually
+        $display("Error: inputs %h * %h + %h", x, y, z);
+        $display("  result = %h (%h expected)", result, rexpected);
+        errors = errors + 1;
+      end
+      vectornum = vectornum + 1;
+      if (testvectors[vectornum] === 'x) begin 
+        $display("%d tests completed with %d errors", 
+	           vectornum, errors);
+        $stop;
+      end
+    end
+endmodule
--- a/examples/verilog/fma/torture.tv
+++ b/examples/verilog/fma/torture.tv
--- a/examples/verilog/fma/torturegen.pl
+++ b/examples/verilog/fma/torturegen.pl
@ -0,0 +1,130 @@
+#!/usr/bin/perl -w
+# torturegen.pl 
+# David_Harris@hmc.edu 19 April 2022
+# Convert TestFloat cases into format for fma16 project torture test
+# Strip out cases involving denorms
+
+use strict;
+
+my @basenames = ("add", "mul", "mulAdd");
+my @roundingmodes = ("rz", "rd", "ru", "rne");
+my @names = ();
+foreach my $name (@basenames) {
+    foreach my $mode (@roundingmodes) {
+        push(@names, "f16_${name}_$mode.tv");
+    }
+}
+
+open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
+my $datestring = localtime();
+print(TORTURE "// Torture tests generated $datestring by $0\n");
+foreach my $tv (@names) {
+    open(TV, "work/$tv") || die("Can't read $tv");
+    my $type = &getType($tv); # is it mul, add, mulAdd
+    my $rm = &getRm($tv); # rounding mode
+#   if ($rm != 0) { next; } # only do rz
+    print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
+    print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
+    my $linecount = 0;
+    my $babyTorture = 0;
+    while (<TV>) {
+        my $line = $_;
+        $linecount++;
+        my $density = 10;
+        if ($type eq "mulAdd") {$density = 500;}
+        if ($babyTorture) {
+            $density = 100;
+            if ($type eq "mulAdd") {$density = 50000;}
+        }
+        if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
+        chomp($line); # strip off newline
+        my @parts = split(/_/, $line);
+        my ($x, $y, $z, $op, $w, $flags);
+        $x = $parts[0];
+        if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
+        if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
+        $op = $rm << 4;
+        if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
+        if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
+        my $opname = sprintf("%02x", $op);
+        if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
+        if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
+        $flags = substr($flags, -1); # take last character
+        if (&fpval($w) eq "NaN") { $w = "7e00"; }
+        my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
+        my $skip = "";
+        if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
+            $skip = "Skipped denorm";
+        }
+        my $summary = &summary($x, $y, $z, $w, $type);
+        if ($skip ne "") {
+            print TORTURE "// $skip $tv line $linecount $line $summary\n"
+        }
+        else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
+    }
+    close(TV);
+}
+close(TORTURE);
+
+sub fpval {
+    my $val = shift;
+    $val = hex($val); # convert hex string to number
+    my $frac = $val & 0x3FF;
+    my $exp = ($val >> 10) & 0x1F;
+    my $sign = $val >> 15;
+
+    my $res;
+    if ($exp == 31 && $frac != 0) { return "NaN"; }
+    elsif ($exp == 31) { $res = "INF"; }
+    elsif ($val == 0) { $res = 0; }
+    elsif ($exp == 0) { $res = "Denorm"; }
+    else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
+
+    if ($sign == 1) { $res = "-$res"; }
+    return $res;
+}
+
+sub summary {
+    my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
+
+    my $xv = &fpval($x);
+    my $yv = &fpval($y);
+    my $zv = &fpval($z);
+    my $wv = &fpval($w);
+
+    if ($type eq "add") { return "$xv + $zv = $wv"; }
+    elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
+    else {return "$xv * $yv + $zv = $wv"; }
+}
+
+sub getType {
+    my $tv = shift;
+
+    if ($tv =~ /mulAdd/) { return("mulAdd"); }
+    elsif ($tv =~ /mul/) { return "mul"; }
+    else { return "add"; }
+}
+
+sub getRm {
+    my $tv = shift;
+
+    if ($tv =~ /rz/) { return 0; }
+    elsif ($tv =~ /rne/) { return 1; }
+    elsif ($tv =~ /rd/) {return 2; }
+    elsif ($tv =~ /ru/) { return 3; }
+    else { return "bad"; }
+}
+
+sub isdenorm {
+    my $fp = shift;
+    my $val = hex($fp);
+    my $expv = $val >> 10;
+    $expv = $expv & 0x1F;
+    my $denorm = 0;
+    if ($expv == 0 && $val != 0) { $denorm = 1;}
+ #   my $e0 = ($expv == 0);
+ #   my $vn0 = ($val != 0);
+ #   my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
+ #   print("Num $fp Exp $expv Denorm $denorm Done\n");
+    return $denorm;
+}
--- a/examples/verilog/fma/wave.do
+++ b/examples/verilog/fma/wave.do
@ -0,0 +1,62 @@
+onerror {resume}
+quietly WaveActivateNextPane {} 0
+add wave -noupdate /testbench_fma16/clk
+add wave -noupdate /testbench_fma16/reset
+add wave -noupdate /testbench_fma16/x
+add wave -noupdate /testbench_fma16/y
+add wave -noupdate /testbench_fma16/z
+add wave -noupdate /testbench_fma16/result
+add wave -noupdate /testbench_fma16/rexpected
+add wave -noupdate /testbench_fma16/dut/x
+add wave -noupdate /testbench_fma16/dut/y
+add wave -noupdate /testbench_fma16/dut/z
+add wave -noupdate /testbench_fma16/dut/mul
+add wave -noupdate /testbench_fma16/dut/add
+add wave -noupdate /testbench_fma16/dut/negr
+add wave -noupdate /testbench_fma16/dut/negz
+add wave -noupdate /testbench_fma16/dut/roundmode
+add wave -noupdate /testbench_fma16/dut/result
+add wave -noupdate /testbench_fma16/dut/XManE
+add wave -noupdate /testbench_fma16/dut/YManE
+add wave -noupdate /testbench_fma16/dut/ZManE
+add wave -noupdate /testbench_fma16/dut/XExpE
+add wave -noupdate /testbench_fma16/dut/YExpE
+add wave -noupdate /testbench_fma16/dut/ZExpE
+add wave -noupdate /testbench_fma16/dut/PExpE
+add wave -noupdate /testbench_fma16/dut/Ne
+add wave -noupdate /testbench_fma16/dut/upOneExt
+add wave -noupdate /testbench_fma16/dut/XSgnE
+add wave -noupdate /testbench_fma16/dut/YSgnE
+add wave -noupdate /testbench_fma16/dut/ZSgnE
+add wave -noupdate /testbench_fma16/dut/PSgnE
+add wave -noupdate /testbench_fma16/dut/ProdManE
+add wave -noupdate /testbench_fma16/dut/NfracS
+add wave -noupdate /testbench_fma16/dut/ProdManAl
+add wave -noupdate /testbench_fma16/dut/ZManExt
+add wave -noupdate /testbench_fma16/dut/ZManAl
+add wave -noupdate /testbench_fma16/dut/Nfrac
+add wave -noupdate /testbench_fma16/dut/res
+add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
+add wave -noupdate /testbench_fma16/dut/NSamt
+add wave -noupdate /testbench_fma16/dut/ZExpGreater
+add wave -noupdate /testbench_fma16/dut/ACLess
+add wave -noupdate /testbench_fma16/dut/upOne
+add wave -noupdate /testbench_fma16/dut/KillProd
+TreeUpdate [SetDefaultTree]
+WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
+quietly wave cursor active 2
+configure wave -namecolwidth 237
+configure wave -valuecolwidth 64
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+configure wave -gridoffset 0
+configure wave -gridperiod 1
+configure wave -griddelta 40
+configure wave -timeline 0
+configure wave -timelineunits ns
+update
+WaveRestoreZoom {4083 ns} {4235 ns}
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -55,20 +55,22 @@
 `define Q_NE 32'd15
 `define Q_NF 32'd112
 `define Q_BIAS 32'd16383
+`define Q_FMT 2'd3
 `define D_LEN 32'd64
 `define D_NE 32'd11
 `define D_NF 32'd52
 `define D_BIAS 32'd1023
-`define D_FMT 32'd1
+`define D_FMT 2'd1
 `define S_LEN 32'd32
 `define S_NE 32'd8
 `define S_NF 32'd23
 `define S_BIAS 32'd127
-`define S_FMT 32'd1
+`define S_FMT 2'd0
 `define H_LEN 32'd16
 `define H_NE 32'd5
 `define H_NF 32'd10
 `define H_BIAS 32'd15
+`define H_FMT 2'd2

 // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
 `define FLEN (`Q_SUPPORTED ? `Q_LEN  : `D_SUPPORTED ? `D_LEN  : `F_SUPPORTED ? `S_LEN  : `H_LEN)
@ -91,6 +93,12 @@
 `define FMT2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0    : 2'd2)
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)

+// largest length in IEU/FPU
+`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
+`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
+`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
+`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
+
 // Disable spurious Verilator warnings

 /* verilator lint_off STMTDLY */
--- a/pipelined/regression/sim-coremark-batch
+++ b/pipelined/regression/sim-coremark-batch
@ -1 +0,0 @@
-vsim -c -do wally-coremark.do
--- a/pipelined/regression/sim-testfloat
+++ b/pipelined/regression/sim-testfloat
@ -9,4 +9,4 @@
 # sqrt   - test square ro
 # all    - test everything

-vsim -do "do testfloat.do rv64fpquad cmp"
+vsim -do "do testfloat.do rv64fp mul"
--- a/pipelined/regression/wally-coremark.do
+++ b/pipelined/regression/wally-coremark.do
@ -1,45 +0,0 @@
-# wally-coremark.do 
-#
-# Modification by Oklahoma State University & Harvey Mudd College
-# Use with Testbench 
-# James Stine, 2008; David Harris 2021
-# Go Cowboys!!!!!!
-#
-# Takes 1:10 to run RV64IC tests using gui
-
-# Use this wally-coremark.do file to run this example.
-# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
-#     do wally-coremark.do
-# or, to run from a shell, type the following at the shell prompt:
-#     vsim -do wally-coremark.do -c
-# (omit the "-c" to see the GUI while running from the shell)
-
-onbreak {resume}
-
-# create library
-if [file exists work] {
-    vdel -all
-}
-vlib work
-
-# compile source files
-# suppress spurious warnngs about 
-# "Extra checking for conflicts with always_comb done at vopt time"
-# because vsim will run vopt
-
-# default to config/coremark, but allow this to be overridden at the command line.  For example:
-#vlog +incdir+../config/coremark_bare +incdir+../config/shared ../testbench/testbench-coremark_bare.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
-vlog +incdir+../config/rv64gc +incdir+../config/shared ../testbench/testbench-coremark_bare.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
-
-# start and run simulation
-# remove +acc flag for faster sim during regressions if there is no need to access internal signals
-vopt +acc work.testbench -o workopt 
-vsim workopt
-
-mem load -startaddress 268435456 -endaddress 268566527 -filltype value -fillradix hex -filldata 0 /testbench/dut/uncore/ram/ram/RAM
-
-#add log -recursive /*
-do wave.do
-run -all
-#run 21400
-#quit
--- a/pipelined/regression/wave-coremark.do
+++ b/pipelined/regression/wave-coremark.do
@ -1,502 +0,0 @@
-onerror {resume}
-quietly WaveActivateNextPane {} 0
-add wave -noupdate /testbench/clk
-add wave -noupdate /testbench/reset
-add wave -noupdate /testbench/test
-add wave -noupdate /testbench/memfilename
-add wave -noupdate /testbench/dut/core/SATP_REGW
-add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
-add wave -noupdate -group {Execution Stage} /testbench/InstrEName
-add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
-add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/priv/trap/InstrValidM
-add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
-add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
-add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
-add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/MemAdrM
-add wave -noupdate /testbench/dut/core/ieu/dp/ResultM
-add wave -noupdate /testbench/dut/core/ieu/dp/ResultW
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrMisalignedFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrAccessFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/IllegalInstrFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/BreakpointFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadMisalignedFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StoreAmoMisalignedFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadAccessFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StoreAmoAccessFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/EcallFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrPageFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadPageFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StorePageFaultM
-add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InterruptM
-add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/PendingIntsM
-add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/CommittedM
-add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/InstrValidM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWritePendingDEM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/TrapM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/ICacheStallF
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM
-add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/MulDivStallD
-add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF
-add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
-add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
-add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
-add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
-add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallF
-add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallD
-add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallE
-add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallM
-add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallW
-add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]}
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPInstrClassE[0]}
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredDirWrongE
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} -divider {class check}
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightNonCFI
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongCFI
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongNonCFI
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPRight
-add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPWrong
-add wave -noupdate -group Bpred -radix hexadecimal -childformat {{{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} -radix binary}} -subitemconfig {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} {-height 16 -radix binary}} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel
-add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNext
-add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRUpdateEN
-add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr
-add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr0
-add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr1
-add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateEN
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRLookup
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/RA1
-add wave -noupdate -group Bpred -expand -group prediction -radix binary /testbench/dut/core/ifu/bpred/bpred/BPPredF
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BTBValidF
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BPInstrClassF
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BTBPredPCF
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/LookUpPCIndex
-add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/TargetPC
-add wave -noupdate -group Bpred -expand -group prediction -expand -group ex -radix binary /testbench/dut/core/ifu/bpred/bpred/BPPredE
-add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
-add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/BPPredDirWrongE
-add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdatePCIndex
-add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateTarget
-add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateEN
-add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdatePC
-add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateTarget
-add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr
-add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCE
-add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/WA1
-add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/TargetWrongE
-add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/FallThroughWrongE
-add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/PredictionPCWrongE
-add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/InstrClassE
-add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/PredictionInstrClassWrongE
-add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredClassNonCFIWrongE
-add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE
-add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE
-add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
-add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/bus/icache/FinalInstrRawF
-add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
-add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrE
-add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrM
-add wave -noupdate -group {instruction pipeline} /testbench/InstrW
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/BPPredPCF
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNext0F
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNext1F
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/SelBPPredF
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/BPPredWrongE
-add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PrivilegedChangePCM
-add wave -noupdate -group {Decode Stage} /testbench/dut/core/ifu/InstrD
-add wave -noupdate -group {Decode Stage} /testbench/InstrDName
-add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD
-add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD
-add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
-add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
-add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf
-add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1
-add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2
-add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a3
-add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd1
-add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd2
-add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/we3
-add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/wd3
-add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ReadDataW
-add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/CSRReadValW
-add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultSrcW
-add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/A
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/B
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUControl
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/result
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/FlagsE
-add wave -noupdate -group alu -divider internals
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/overflow
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/carry
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/zero
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/neg
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/lt
-add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ltu
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1D
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2D
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1E
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2E
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdE
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdM
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdW
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/MemReadE
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RegWriteM
-add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RegWriteW
-add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/ForwardAE
-add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/ForwardBE
-add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/LoadStallD
-add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/WriteDataE
-add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/ALUResultE
-add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/SrcAE
-add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/SrcBE
-add wave -noupdate -group PCS /testbench/dut/core/ifu/PCNextF
-add wave -noupdate -group PCS /testbench/dut/core/PCF
-add wave -noupdate -group PCS /testbench/dut/core/ifu/PCD
-add wave -noupdate -group PCS /testbench/dut/core/PCE
-add wave -noupdate -group PCS /testbench/dut/core/PCM
-add wave -noupdate -group PCS /testbench/PCW
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/Funct3E
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/MulDivE
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/W64E
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/StallM
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/StallW
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/FlushM
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/FlushW
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/MulDivResultW
-add wave -noupdate -group muldiv /testbench/dut/core/mdu/DivBusyE
-add wave -noupdate -group icache -color Gold /testbench/dut/core/ifu/bus/icache/controller/CurrState
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/BasePAdrF
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/HitWay
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/VictimWay
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/WriteEnable}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/SetValid}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/CacheTagMem/StoredData}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/ValidBits}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
-add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/controller/NextState
-add wave -noupdate -group icache /testbench/dut/core/ifu/ITLBMissF
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ITLBWriteF
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ReadLineF
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ReadLineF
-add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/BasePAdrF
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/hit
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spill
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/ICacheStallF
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spillSave
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spillSave
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/CntReset
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/PreCntEn
-add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/CntEn
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/InstrPAdrF
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/InstrInF
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/FetchCountFlag
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/FetchCount
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/InstrReadF
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/InstrAckF
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/ICacheMemWriteEnable
-add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/ICacheBusWriteData
-add wave -noupdate -group AHB -color Gold /testbench/dut/core/ebu/BusState
-add wave -noupdate -group AHB /testbench/dut/core/ebu/NextBusState
-add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/AtomicMaskedM
-add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/InstrReadF
-add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/MemSizeM
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HCLK
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HRESETn
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HRDATA
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HREADY
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HRESP
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDR
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HWDATA
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITE
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZE
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HBURST
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HPROT
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HTRANS
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED
-add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED
-add wave -noupdate -group lsu -expand -group {LSU ARB} /testbench/dut/core/lsu/arbiter/SelPTW
-add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu.bus.dcache/dcachefsm/CurrState
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/WalkerPageFaultM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/WriteDataM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMBlockWriteEnableM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWordWriteEnableM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWayWriteEnable
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWordEnable
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMBlockWayWriteEnableM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SelAdrM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/ReadDataBlockM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/DCacheBusWriteData
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/SetValid}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/SetDirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/CacheTagMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/DirtyBits}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/ValidBits}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/DirtyBits}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/SetDirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/WriteWordEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/CacheTagMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/SetValid}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/SetDirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/CacheTagMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/DirtyBits}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/ValidBits}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/SetValid}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/SetDirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/CacheTagMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/DirtyBits}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/ValidBits}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/SetValid
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/ClearValid
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/SetDirty
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/ClearDirty
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/HitWay}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/Valid}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/Dirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/ReadTag}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/HitWay}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/Valid}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/Dirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/ReadTag}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/HitWay}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/Valid}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/Dirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/ReadTag}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/HitWay}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/Valid}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/Dirty}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/ReadTag}
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/HitWay
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataBlockWayMaskedM
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataWordM
-add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataWordMuxM
-add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimTag
-add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimWay
-add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimDirtyWay
-add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimDirty
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemRWM
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemAdrE
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemPAdrM
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/Funct3M
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/Funct7M
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/AtomicM
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/FlushDCacheM
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/CacheableM
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/WriteDataM
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/ReadDataM
-add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/DCacheStallM
-add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/FlushAdrFlag
-add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu.bus.dcache/HitWay
-add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu.bus.dcache/CacheHit
-add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu.bus.dcache/FetchCount
-add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/FetchCountFlag
-add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBPAdr
-add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBRead
-add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBWrite
-add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBAck
-add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/HRDATA
-add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/HWDATA
-add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/EffectivePrivilegeMode
-add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/Translate
-add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/DisableTranslation
-add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/TLBMiss
-add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/TLBHit
-add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/PhysicalAddress
-add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/TLBPageFault
-add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/LoadAccessFaultM
-add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/StoreAmoAccessFaultM
-add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/TLBPAdr
-add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/genblk1/tlb/PTE
-add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/genblk1/tlb/TLBWrite
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/PhysicalAddress
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/SelRegions
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/Cacheable
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/Idempotent
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/AtomicAllowed
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/PMAAccessFault
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMAInstrAccessFaultF
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMALoadAccessFaultM
-add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMAStoreAmoAccessFaultM
-add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPInstrAccessFaultF
-add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPLoadAccessFaultM
-add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPStoreAmoAccessFaultM
-add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/genblk1/WalkerState
-add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/PCF
-add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/genblk1/TranslationVAdr
-add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/TranslationPAdr
-add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/HPTWReadPTE
-add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/PTE
-add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/ITLBMissF
-add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/DTLBMissM
-add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/ITLBWriteF
-add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/DTLBWriteM
-add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerInstrPageFaultF
-add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerLoadPageFaultM
-add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerStorePageFaultM
-add wave -noupdate -group csr /testbench/dut/core/priv/csr/MIP_REGW
-add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/TLBWrite
-add wave -noupdate -group itlb /testbench/dut/core/ifu/ITLBMissF
-add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/PhysicalAddress
-add wave -noupdate /testbench/dut/core/lsu.bus.dcache/VAdr
-add wave -noupdate /testbench/dut/core/lsu.bus.dcache/MemPAdrM
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC
-add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/ExtIntM
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn
-add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HCLK
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESETn
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HSELUART
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HADDR
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWRITE
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWDATA
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADUART
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESPUART
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADYUART
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/SIN
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DSRb
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DCDb
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/CTSb
-add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/RIb
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/SOUT
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RTSb
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/DTRb
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/OUT1b
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/OUT2b
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/INTR
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/TXRDYb
-add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RXRDYb
-add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HCLK
-add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART
-add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR
-add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE
-add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA
-add wave -noupdate -radix unsigned /testbench/dut/core/priv/csr/genblk1/counters/genblk1/CYCLE_REGW
-add wave -noupdate -radix unsigned /testbench/dut/core/priv/csr/genblk1/counters/genblk1/INSTRET_REGW
-add wave -noupdate -label LoadStall -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[3]}
-add wave -noupdate -label {Branch Instr} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[5]}
-add wave -noupdate -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[4]}
-add wave -noupdate -label {Jump, Jal, Jalr} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[7]}
-add wave -noupdate -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[8]}
-add wave -noupdate -label {BTB Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[6]}
-add wave -noupdate -label {BP Class Non CFI Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[10]}
-add wave -noupdate -label DCacheAccess -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[11]}
-add wave -noupdate -label DCacheMiss -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[12]}
-add wave -noupdate -label Return -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[9]}
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/MCOUNTINHIBIT_REGW
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/InstrValidM
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/InstrValidNotFlushedM
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/BPPredDirWrongM
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/genblk1/genblk1/LoadStallM
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/genblk1/NextHPMCOUNTERM
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/DCacheMiss
-add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/DCacheAccess
-TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 6} {17923831 ns} 0}
-quietly wave cursor active 1
-configure wave -namecolwidth 250
-configure wave -valuecolwidth 297
-configure wave -justifyvalue left
-configure wave -signalnamewidth 1
-configure wave -snapdistance 10
-configure wave -datasetprefix 0
-configure wave -rowmargin 4
-configure wave -childrowmargin 2
-configure wave -gridoffset 0
-configure wave -gridperiod 1
-configure wave -griddelta 40
-configure wave -timeline 0
-configure wave -timelineunits ns
-update
-WaveRestoreZoom {0 ns} {18715695 ns}
--- a/pipelined/regression/wave-dos/ahb-muldiv.do
+++ b/pipelined/regression/wave-dos/ahb-muldiv.do
@ -67,6 +67,7 @@ add wave -hex /testbench/dut/core/ebu/HTRANS
 add wave -hex /testbench/dut/core/ebu/HRDATA
 add wave -hex /testbench/dut/core/ebu/HWRITE
 add wave -hex /testbench/dut/core/ebu/HWDATA
+add wave -hex /testbench/dut/core/ebu/HBURST
 add wave -hex /testbench/dut/core/ebu/CaptureDataM
 add wave -divider

--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@ -1,102 +1,9 @@

 add wave -noupdate /testbenchfp/clk
 add wave -noupdate -radix decimal /testbenchfp/VectorNum
-add wave -group Other -noupdate /testbenchfp/FrmNum
-add wave -group Other -noupdate /testbenchfp/X
-add wave -group Other -noupdate /testbenchfp/Y
-add wave -group Other -noupdate /testbenchfp/Z
-add wave -group Other -noupdate /testbenchfp/Res
-add wave -group Other -noupdate /testbenchfp/Ans
-
-add wave -group Rne -noupdate /testbenchfp/FmaRneX
-add wave -group Rne -noupdate /testbenchfp/FmaRneY
-add wave -group Rne -noupdate /testbenchfp/FmaRneZ
-add wave -group Rne -noupdate /testbenchfp/FmaRneRes
-add wave -group Rne -noupdate /testbenchfp/FmaRneAns
-add wave -group Rz -noupdate /testbenchfp/FmaRzX
-add wave -group Rz -noupdate /testbenchfp/FmaRzY
-add wave -group Rz -noupdate /testbenchfp/FmaRzZ
-add wave -group Rz -noupdate /testbenchfp/FmaRzRes
-add wave -group Rz -noupdate /testbenchfp/FmaRzAns
-add wave -group Ru -noupdate /testbenchfp/FmaRuX
-add wave -group Ru -noupdate /testbenchfp/FmaRuY
-add wave -group Ru -noupdate /testbenchfp/FmaRuZ
-add wave -group Ru -noupdate /testbenchfp/FmaRuRes
-add wave -group Ru -noupdate /testbenchfp/FmaRuAns
-add wave -group Rd -noupdate /testbenchfp/FmaRdX
-add wave -group Rd -noupdate /testbenchfp/FmaRdY
-add wave -group Rd -noupdate /testbenchfp/FmaRdZ
-add wave -group Rd -noupdate /testbenchfp/FmaRdRes
-add wave -group Rd -noupdate /testbenchfp/FmaRdAns
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmX
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmY
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns
-add wave -group AllSignals -noupdate /*
-add wave -group AllSignals -noupdate /testbenchfp/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/*
+add wave -noupdate /testbenchfp/FrmNum
+add wave -noupdate /testbenchfp/X
+add wave -noupdate /testbenchfp/Y
+add wave -noupdate /testbenchfp/Z
+add wave -noupdate /testbenchfp/Res
+add wave -noupdate /testbenchfp/Ans
--- a/pipelined/regression/wave.do
+++ b/pipelined/regression/wave.do
@ -473,6 +473,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead
 add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr
 add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck
 add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA
+add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete
 add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF
 add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState
 add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0
--- a/pipelined/src/ebu/ahblite.sv
+++ b/pipelined/src/ebu/ahblite.sv
@ -45,6 +45,10 @@ module ahblite (
  input logic 				 IFUBusRead,
  output logic [`XLEN-1:0] 	 IFUBusHRDATA,
  output logic 				 IFUBusAck,
+  output logic         IFUBusInit,
+  input logic [2:0]    IFUBurstType,
+  input logic [1:0]    IFUTransType,
+  input logic          IFUTransComplete,
  // Signals from Data Cache
  input logic [`PA_BITS-1:0] LSUBusAdr,
  input logic 				 LSUBusRead, 
@ -52,7 +56,11 @@ module ahblite (
  input logic [`XLEN-1:0] 	 LSUBusHWDATA,
  output logic [`XLEN-1:0] 	 LSUBusHRDATA,
  input logic [2:0] 		 LSUBusSize,
+  input logic [2:0]      LSUBurstType,
+  input logic [1:0]    LSUTransType,
+  input logic          LSUTransComplete,
  output logic 				 LSUBusAck,
+  output logic         LSUBusInit,
  // AHB-Lite external signals
  (* mark_debug = "true" *) input logic [`AHBW-1:0] HRDATA,
  (* mark_debug = "true" *) input logic HREADY, HRESP,
@ -87,6 +95,9 @@ module ahblite (
  // Data accesses have priority over instructions.  However, if a data access comes
  // while an instruction read is occuring, the instruction read finishes before
  // the data access can take place.
+  //  *** This is no longer true when adding burst mode. We need to finish the current
+  //  read before doing another read. Need to work this out, but preliminarily we can
+  //  store the current read type in a flop and use that to figure out what burst type to use.

  flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState);

@ -100,19 +111,21 @@ module ahblite (
  // interface that might be used in place of the ahblite.
  always_comb 
    case (BusState) 
-      IDLE: if (LSUBusRead)      NextBusState = MEMREAD;  // Memory has priority over instructions
-            else if (LSUBusWrite)NextBusState = MEMWRITE;
-            else if (IFUBusRead)   NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;
-      MEMREAD: if (~HREADY)        NextBusState = MEMREAD;
-            else if (IFUBusRead)   NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;
-      MEMWRITE: if (~HREADY)       NextBusState = MEMWRITE;
-            else if (IFUBusRead)   NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;
-      INSTRREAD: if (~HREADY)      NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;  // if (IFUBusRead still high) *** need to wait?
-      default:                     NextBusState = IDLE;
+      IDLE: if (LSUBusRead)                               NextBusState = MEMREAD;  // Memory has priority over instructions
+            else if (LSUBusWrite)                         NextBusState = MEMWRITE;
+            else if (IFUBusRead)                          NextBusState = INSTRREAD;
+            else                                          NextBusState = IDLE;
+      MEMREAD: if (LSUTransComplete & IFUBusRead)         NextBusState = INSTRREAD;
+               else if (LSUTransComplete)                 NextBusState = IDLE;
+               else                                       NextBusState = MEMREAD;
+      MEMWRITE: if (LSUTransComplete & IFUBusRead)        NextBusState = INSTRREAD;
+                else if (LSUTransComplete)                NextBusState = IDLE;
+                else                                      NextBusState = MEMWRITE;
+      INSTRREAD: if (IFUTransComplete & LSUBusRead)       NextBusState = MEMREAD;
+                 else if (IFUTransComplete & LSUBusWrite) NextBusState = MEMWRITE;
+                 else if (IFUTransComplete)               NextBusState = IDLE;
+                 else                                     NextBusState = INSTRREAD;
+      default:                                            NextBusState = IDLE;
    endcase


@ -122,7 +135,7 @@ module ahblite (
  assign #1 HADDR = AccessAddress;
  assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
  assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize;
-  assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
+  assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.

  /* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE.
        000: Single (SINGLE)
@ -133,15 +146,16 @@ module ahblite (
        101: 8-beat incrementing burst (INCR8)
        110: 16-beat wrapping burst (WRAP16) [wraps if X in 0X000000]
        111: 16-beat incrementing burst (INCR16)
-  */
+        *** Remove if not necessary
+  */ 


  assign HPROT = 4'b0011; // not used; see Section 3.7
-  assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
+  assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
  assign HMASTLOCK = 0; // no locking supported
-  assign HWRITE = NextBusState == MEMWRITE;
+  assign HWRITE = (NextBusState == MEMWRITE);
  // delay write data by one cycle for
-  flop #(`XLEN) wdreg(HCLK, LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
+  flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
  // delay signals for subword writes
  flop #(3)   adrreg(HCLK, HADDR[2:0], HADDRD);
  flop #(4)   sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED);
@ -153,7 +167,9 @@ module ahblite (
 
  assign IFUBusHRDATA = HRDATA;
  assign LSUBusHRDATA = HRDATA;
-  assign IFUBusAck = (BusState == INSTRREAD) & (NextBusState != INSTRREAD);
-  assign LSUBusAck = (BusState == MEMREAD) & (NextBusState != MEMREAD) | (BusState == MEMWRITE) & (NextBusState != MEMWRITE);
+  assign IFUBusInit = (BusState != INSTRREAD) & (NextBusState == INSTRREAD);
+  assign LSUBusInit = (((BusState != MEMREAD) & (NextBusState == MEMREAD)) | (BusState != MEMWRITE) & (NextBusState == MEMWRITE));
+  assign IFUBusAck = HREADY & (BusState == INSTRREAD);
+  assign LSUBusAck = HREADY & ((BusState == MEMREAD) | (BusState == MEMWRITE));

 endmodule
--- a/pipelined/src/fpu/cvtshiftcalc.sv
+++ b/pipelined/src/fpu/cvtshiftcalc.sv
@ -0,0 +1,69 @@
+`include "wally-config.vh"
+
+module cvtshiftcalc(
+    input logic                    XZeroM,
+    input logic                    ToInt,
+    input logic                    IntToFp,
+    input logic  [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    input logic  [`NF:0]           XManM,          // input mantissas
+    input logic     [`FMTBITS-1:0]  OutFmt,       // output format
+    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic CvtResDenormUfM,
+    output logic CvtResUf,
+    output logic [`LGLEN+`NF:0]    CvtShiftIn    // number to be shifted
+);
+    logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // shifter
+    ///////////////////////////////////////////////////////////////////////////
+
+    // seclect the input to the shifter
+    //      fp  -> int:
+    //          |  `XLEN  zeros |     Mantissa      | 0's if nessisary |
+    //          Other problems:
+    //              - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
+    //              - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
+    //                  - ex: for the case 0010000.... (double)
+    //      ??? -> fp:
+    //          - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
+    //          - otherwise:
+    //              |     LzcInM      | 0's if nessisary | 
+    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
+                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : 
+                                   {CvtLzcInM, {`NF+1{1'b0}}};
+    
+    
+    // choose the negative of the fraction size
+    if (`FPSIZES == 1) begin
+        assign ResNegNF = -($clog2(`NF)+1)'(`NF); 
+
+    end else if (`FPSIZES == 2) begin
+        assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT:  ResNegNF = -($clog2(`NF)+1)'(`NF);
+                `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
+                `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
+                default: ResNegNF = 1'bx;
+            endcase
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (OutFmt)
+                2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
+                2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
+                2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
+                2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
+            endcase
+    end
+    // determine if the result underflows ??? -> fp
+    //      - if the first 1 is shifted out of the result then the result underflows
+    //      - can't underflow an integer to fp conversions
+    assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
+   
+endmodule
--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@ -2,13 +2,12 @@
 `include "wally-config.vh"

 // FOpCtrlE values
-//    111   min
+//    110   min
 //    101   max
 //    010   equal
 //    001   less than
 //    011   less than or equal

-
 module fcmp (   
   input logic  [`FMTBITS-1:0]   FmtE,           // precision 1 = double 0 = single
   input logic  [2:0]            FOpCtrlE,       // see above table
@ -20,12 +19,13 @@ module fcmp (
   input logic                   XSNaNE, YSNaNE, // is signaling NaN
   input logic  [`FLEN-1:0]      FSrcXE, FSrcYE, // original, non-converted to double, inputs
   output logic                  CmpNVE,         // invalid flag
-   output logic [`FLEN-1:0]      CmpResE         // compare resilt
+   output logic [`FLEN-1:0]      CmpFpResE,         // compare resilt
+   output logic [`XLEN-1:0]      CmpIntResE         // compare resilt
   );

   logic LTabs, LT, EQ; // is X < or > or = Y
   logic [`FLEN-1:0] NaNRes;
-   logic BothZeroE, EitherNaNE, EitherSNaNE;
+   logic BothZero, EitherNaN, EitherSNaN;
   
   assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
   assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
@ -36,9 +36,9 @@ module fcmp (
 //   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
   assign EQ = (FSrcXE == FSrcYE);

-   assign BothZeroE = XZeroE&YZeroE;
-   assign EitherNaNE = XNaNE|YNaNE;
-   assign EitherSNaNE = XSNaNE|YSNaNE;
+   assign BothZero = XZeroE&YZeroE;
+   assign EitherNaN = XNaNE|YNaNE;
+   assign EitherSNaN = XSNaNE|YSNaNE;


   // flags
@ -47,12 +47,12 @@ module fcmp (
   //    EQ - quiet - sets invalid if signaling NaN input
   always_comb begin
      case (FOpCtrlE[2:0])
-         3'b111: CmpNVE = EitherSNaNE;//min 
-         3'b101: CmpNVE = EitherSNaNE;//max
-         3'b010: CmpNVE = EitherSNaNE;//equal
-         3'b001: CmpNVE = EitherNaNE;//less than
-         3'b011: CmpNVE = EitherNaNE;//less than or equal
-         default: CmpNVE = 1'b0;
+         3'b110: CmpNVE = EitherSNaN;//min 
+         3'b101: CmpNVE = EitherSNaN;//max
+         3'b010: CmpNVE = EitherSNaN;//equal
+         3'b001: CmpNVE = EitherNaN;//less than
+         3'b011: CmpNVE = EitherNaN;//less than or equal
+         default: CmpNVE = 1'bx;
      endcase
   end 

@ -91,7 +91,7 @@ module fcmp (
               `FMT2:
                  if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
                  else         NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-               default:        NaNRes = (`FLEN)'(0);
+               default:        NaNRes = {`FLEN{1'bx}};
            endcase

   else if (`FPSIZES == 4)
@ -112,16 +112,12 @@ module fcmp (
            endcase

 // when one input is a NaN -output the non-NaN
-   always_comb
-      case (FOpCtrlE[2:0])
-         3'b111: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
-                                 : YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
-         3'b101: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
-                                 : YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
-         3'b010: CmpResE = {(`FLEN-1)'(0), (EQ|BothZeroE) & ~EitherNaNE}; // Equal
-         3'b001: CmpResE = {(`FLEN-1)'(0), LT & ~BothZeroE & ~EitherNaNE}; // Less than
-         3'b011: CmpResE = {(`FLEN-1)'(0), (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
-         default: CmpResE = (`FLEN)'(0);
-      endcase
+   assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
+                                          : YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE : 
+                                    XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
+                                          : YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
+                                    
+
+   assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
   
 endmodule
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@ -10,99 +10,99 @@ module fctrl (
  output logic       IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
  output logic       FRegWriteD,  // FP register write enable
  output logic       FDivStartD,  // Start division or squareroot
-  output logic [1:0] FResultSelD, // select result to be written to fp register
+  output logic [1:0] FResSelD, // select result to be written to fp register
  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
-  output logic [1:0] FResSelD,    // select one of the results done in the memory stage
-  output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
+  output logic [1:0] PostProcSelD, 
  output logic [`FMTBITS-1:0] FmtD,        // precision - single-0 double-1
  output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
  output logic       FWriteIntD   // is the result written to the integer register
  );

-  `define FCTRLW 13
+  `define FCTRLW 11
  logic [`FCTRLW-1:0] ControlsD;
+  //*** will putting x for don't cares reduce area in synthisis???
  // FPU Instruction Decoder
  always_comb
    if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
-      ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
+      ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1;
    else case(OpD)
-    // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
+    // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
      7'b0000111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
-                    3'b011:  ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
-                    default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flw
+                    3'b011:  ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // fld
+                    default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
                  endcase
      7'b0100111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
-                    3'b011:  ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
-                    default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw
+                    3'b011:  ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd
+                    default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
                  endcase
-      7'b1000011:   ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
-      7'b1000111:   ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
-      7'b1001011:   ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
-      7'b1001111:   ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
+      7'b1000011:   ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
+      7'b1000111:   ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
+      7'b1001011:   ControlsD = `FCTRLW'b1_0_01_10_010_0_0; // fnmsub
+      7'b1001111:   ControlsD = `FCTRLW'b1_0_01_10_011_0_0; // fnmadd
      7'b1010011: casez(Funct7D)
-                    7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
-                    7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
-                    7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
-                    7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
-                    7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
+                    7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0; // fadd
+                    7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0; // fsub
+                    7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0; // fmul
+                    7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0; // fdiv
+                    7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0; // fsqrt
                    7'b00100??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
-                                  3'b010:  ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_00_xx_000_0_0; // fsgnj
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_00_xx_001_0_0; // fsgnjn
+                                  3'b010:  ControlsD = `FCTRLW'b1_0_00_xx_010_0_0; // fsgnjx
+                                  default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
                                endcase
                    7'b00101??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_00_xx_110_0_0; // fmin
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_00_xx_101_0_0; // fmax
+                                  default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
                                endcase
                    7'b10100??: case(Funct3D)
-                                  3'b010:  ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
-                                  3'b001:  ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
-                                  3'b000:  ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                                  3'b010:  ControlsD = `FCTRLW'b0_1_00_xx_010_0_0; // feq
+                                  3'b001:  ControlsD = `FCTRLW'b0_1_00_xx_001_0_0; // flt
+                                  3'b000:  ControlsD = `FCTRLW'b0_1_00_xx_011_0_0; // fle
+                                  default: ControlsD = `FCTRLW'b0_0_00_xx_0xx__0_1; // non-implemented instruction
                                endcase
-                    7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
-                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
-                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
-                                else                            ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
-                    7'b1101000: case(Rs2D[1:0])//***reduce resSel
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w   w->s
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l   l->s
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
+                    7'b11100??: if (Funct3D == 3'b001)          ControlsD = `FCTRLW'b0_1_10_xx_000_0_0; // fclass
+                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0; // fmv.x.w   to int reg
+                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0; // fmv.x.d   to int reg
+                                else                            ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
+                    7'b1101000: case(Rs2D[1:0])
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.s.w   w->s
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.s.wu wu->s
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.s.l   l->s
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.s.lu lu->s
                                endcase
                    7'b1100000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s   s->w
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s  s->wu
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s   s->l
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s  s->lu
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.s   s->w
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.s  s->wu
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.s   s->l
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.s  s->lu
                                endcase
-                    7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
-                    7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
+                    7'b1111000: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0; // fmv.w.x   to fp reg
+                    7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0; // fcvt.s.d
                    7'b1101001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w   w->d
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l   l->d
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.d.w   w->d
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.d.wu wu->d
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.d.l   l->d
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.d.lu lu->d
                                endcase
                    7'b1100001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d   d->w
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d  d->wu
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d   d->l
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d  d->lu
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.d   d->w
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.d  d->wu
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.d   d->l
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.d  d->lu
                                endcase
-                    7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
-                    7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
-                    default:    ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                    7'b1111001: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0; // fmv.d.x   to fp reg
+                    7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0; // fcvt.d.s
+                    default:    ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
                  endcase
-      default:      ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+      default:      ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
    endcase

  // unswizzle control bits
-  assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
+  assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
  
  // rounding modes:
  //    000 - round to nearest, ties to even
@ -121,82 +121,61 @@ module fctrl (
      assign FmtD = 0;
    else if (`FPSIZES == 2)begin
      logic [1:0] FmtTmp;
-      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
      assign FmtD = (`FMT == FmtTmp);
    end
    else if (`FPSIZES == 3|`FPSIZES == 4)
-      assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];

-      // assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
-  // FResultSel:
-  //    000 - ReadRes - load
-  //    001 - FMARes  - FMA and multiply
-  //    010 - FAddRes - add and fp to fp
-  //    011 - FDivRes - divide and squareroot
-  //    100 - FRes    - anything that is written to the fp register and is ready in the memory stage
-  //        FResSel:
-  //            00 - SrcA   - move to fp register 
-  //            01 - SgnRes - sign injection
-  //            10 - CmpRes - min/max
-  //            11 - CvtRes - convert to fp
-  
-  // FIntResSel:
-  //    00 - CmpRes   - less than, equal, or less than or equal 
-  //    01 - FSrcX    - move to int register
-  //    10 - ClassRes - classify
-  //    11 - CvtRes   - convert to signed/unsigned int
+//  Final Res Sel:
+//        fp      int
+//  00  other     cmp
+//  01  postproc  cvt
+//  10  store     class
+//  11            mv

-  // OpCtrl values: 
-  // div/sqrt
-      //  fdiv  = ???0
-      //  fsqrt = ???1
+//  post processing Sel:
+//  00  cvt
+//  01  div
+//  10  fma

-  // cmp		
-      //  fmin = ?111
-      //  fmax = ?101
-      //  feq  = ?010
-      //  flt  = ?001
-      //  fle  = ?011
-      //  {?,  is min or max,   is eq or le,   is lt or le}
+//  Other Sel:
+//    Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
+//        000 - sign            00
+//        001 - negate sign     00
+//        010 - xor sign        00
+//        011 - mv to fp        01
+//        110 - min             10
+//        101 - max             10

-  //fma/mult	
-      //  fmadd  = ?000
-      //  fmsub  = ?001
-      //  fnmsub = ?010	-(a*b)+c
-      //  fnmadd = ?011 -(a*b)-c
-      //  fmul   = ?100
-      //	{?, is mul, negate product, negate addend}
-
-  // sgn inj
-      //  fsgnj  = ??00
-      //  fsgnjn = ??01
-      //  fsgnjx = ??10
-
-  // add/sub/cnvt
-      //  fadd      = 0000
-      //  fsub      = 0001
-      //  fcvt.s.d  = 0111
-      //  fcvt.d.s  = 0111
-      //  Fmt controls the output for fp -> fp
-      
-  // convert
-      //  fcvt.w.s  = 0010
-      //  fcvt.wu.s = 0110
-      //  fcvt.s.w  = 0001
-      //  fcvt.s.wu = 0101
-      //  fcvt.l.s  = 1010
-      //  fcvt.lu.s = 1110
-      //  fcvt.s.l  = 1001
-      //  fcvt.s.lu = 1101
-      //  fcvt.w.d  = 0010 
-      //  fcvt.wu.d = 0110
-      //  fcvt.d.w  = 0001
-      //  fcvt.d.wu = 0101
-      //  fcvt.l.d  = 1010
-      //  fcvt.lu.d = 1110
-      //  fcvt.d.l  = 1001
-      //  fcvt.d.lu = 1101
-      //  {long, unsigned, to int, from int}
+//  OpCtrl:
+//    Fma: {not multiply-add?, negate prod?, negate Z?}
+//        000 - fmadd
+//        001 - fmsub
+//        010 - fnmsub
+//        011 - fnmadd
+//        100 - mul
+//        110 - add
+//        111 - sub
+//    Div: 
+//        0 - ???
+//        1 - ???
+//    Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
+//    Cvt Fp: output format
+//        10 - to half
+//        00 - to single
+//        01 - to double
+//        11 - to quad
+//    Cmp: {equal?, less than?}
+//        010 - eq
+//        001 - lt
+//        011 - le
+//        110 - min
+//        101 - max
+//    Sgn:
+//        00 - sign
+//        01 - negate sign
+//        10 - xor sign
    

 endmodule
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@ -1,8 +1,5 @@

 `include "wally-config.vh"
-// largest length in IEU/FPU
-`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
-`define LOGLGLEN $unsigned($clog2(`LGLEN+1))

 module fcvt (
    input logic             XSgnE,          // input's sign
@ -13,14 +10,13 @@ module fcvt (
    input logic             FWriteIntE,     // is fp->int (since it's writting to the integer register)
    input logic             XZeroE,         // is the input zero
    input logic             XDenormE,   // is the input denormalized
-    input logic             XInfE,          // is the input infinity
-    input logic             XNaNE,          // is the input a NaN
-    input logic             XSNaNE,         // is the input a signaling NaN
-    input logic [2:0]       FrmE,           // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic [`FMTBITS-1:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
-    output logic [`FLEN-1:0] CvtResE,       // the fp conversion result
-    output logic [`XLEN-1:0] CvtIntResE,    // the int conversion result
-    output logic [4:0]      CvtFlgE         // the conversion's flags
+    output logic [`NE:0]           CvtCalcExpE,    // the calculated expoent
+	output logic [`LOGLGLEN-1:0] CvtShiftAmtE,  // how much to shift by
+    output logic                   CvtResDenormUfE,// does the result underflow or is denormalized
+    output logic                   CvtResSgnE,     // the result's sign
+    output logic                   IntZeroE,      // is the integer zero?
+    output logic [`LGLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
    );

    // OpCtrls:
@ -41,34 +37,8 @@ module fcvt (
    logic [`FMTBITS-1:0]    OutFmt;     // format of the output
    logic [`XLEN-1:0]       PosInt;     // the positive integer input
    logic [`XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
-    logic [`LGLEN-1:0]      LzcIn;      // input to the Leading Zero Counter (priority encoder)
-    logic [`NE:0]           CalcExp;    // the calculated expoent
-	logic [`LOGLGLEN-1:0] ShiftAmt;  // how much to shift by
-    logic [`LGLEN+`NF:0]    ShiftIn;    // number to be shifted
-    logic                   ResDenormUf;// does the result underflow or is denormalized
-    logic                   ResUf;      // does the result underflow
-    logic [`LGLEN+`NF:0]    Shifted;    // the shifted result
    logic [`NE-2:0]         NewBias;    // the bias of the final result
-    logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
    logic [`NE-1:0]	        OldExp;     // the old exponent
-    logic                   ResSgn;     // the result's sign
-    logic                   Sticky;     // sticky bit - for rounding
-    logic                   Round;      // round bit - for rounding
-    logic                   LSBFrac;    // the least significant bit of the fraction - for rounding
-    logic                   CalcPlus1;  // the calculated plus 1
-    logic                   Plus1;      // add one to the final result?
-    logic [`FLEN-1:0]       ShiftedPlus1;   // plus one shifted to the proper position
-    logic [`NE:0]           FullResExp; // the full result exponent (with the overflow bit) 
-    logic [`NE-1:0]         ResExp;     // the result's exponent (trimmed to the correct size)
-    logic [`NF-1:0]         ResFrac;    // the result's fraction
-    logic [`XLEN+1:0]       NegRes;     // the negation of the result
-    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
-    logic                   Overflow, Underflow, Inexact, Invalid; // flags
-    logic                   IntInexact, FpInexact, IntInvalid, FpInvalid;   // flags for FP and int outputs
-    logic [`NE-1:0]         MaxExp;         // the maximum exponent before overflow
-    logic [1:0]             NegResMSBS;     // the negitive integer result's most significant bits
-    logic [`FLEN-1:0]       NaNRes, InfRes, Res, UfRes; //various special results
-    logic                   KillRes;    // kill the result?
    logic                   Signed;     // is the opperation with a signed integer?
    logic                   Int64;      // is the integer 64 bits?
    logic                   IntToFp;       // is the opperation an int->fp conversion?
@ -97,8 +67,9 @@ module fcvt (
    // 1) negate the input if the input is a negitive singed integer
    // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)

-    assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
+    assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE;
    assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
+    assign IntZeroE = ~|TrimInt;

    ///////////////////////////////////////////////////////////////////////////
    // lzc 
@ -107,32 +78,16 @@ module fcvt (
    // choose the input to the leading zero counter i.e. priority encoder
    //             int -> fp : | positive integer | 00000... (if needed) | 
    //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
+    assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
    
-    lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
-
+    lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);

    ///////////////////////////////////////////////////////////////////////////
    // shifter
    ///////////////////////////////////////////////////////////////////////////

-    // seclect the input to the shifter
-    //      fp  -> int:
-    //          |  `XLEN  zeros |     Mantissa      | 0's if nessisary |
-    //          Other problems:
-    //              - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
-    //              - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
-    //                  - ex: for the case 0010000.... (double)
-    //      ??? -> fp:
-    //          - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
-    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
-    //          - otherwise:
-    //              |     lzcIn      | 0's if nessisary | 
-    assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
-                     ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} : 
-                                   {LzcIn, {`NF+1{1'b0}}};
-// kill the shift if it's negitive
+    // kill the shift if it's negitive
    // select the amount to shift by
    //      fp -> int: 
    //          - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
@ -144,47 +99,10 @@ module fcvt (
    //              - only shift fp -> fp if the intital value is denormalized
    //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
    //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
-                    ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] : 
+    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
+                    CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : 
                              (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
    
-    // shift
-    //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
-    //          process:
-    //              - start - CalcExp = 1 + XExp - Largest Bias
-    //                  |  `XLEN  zeros     |     Mantissa      | 0's if nessisary |
-    //
-    //              - shift left 1 (1)
-    //                  | `XLEN-1 zeros |bit|     frac      | 0's if nessisary |
-    //                                      . <- binary point
-    //
-    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
-    //                  |  0's |     Mantissa      |      0's if nessisary     |
-    //                  |     keep          |
-    //
-    //      fp -> fp:
-    //          - if result is denormalized or underflowed:
-    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | << NF+CalcExp-1
-    //          process:
-    //             - start
-    //                 |     mantissa      | 0's |
-    //
-    //             - shift right by NF-1 (NF-1)
-    //                 |  `NF-1  zeros   |     mantissa      | 0's |
-    //
-    //             - shift left by CalcExp = XExp - Largest bias + new bias
-    //                 |   0's  |     mantissa      |     0's      |
-    //                 |       keep      |
-    //
-    //          - if the input is denormalized:
-    //              |     lzcIn      | 0's if nessisary | << ZeroCnt+1
-    //              - plus 1 to shift out the first 1
-    //
-    //      int -> fp: |     lzcIn      | 0's if nessisary | << ZeroCnt+1
-    //              - plus 1 to shift out the first 1
-
-    assign Shifted = ShiftIn << ShiftAmt;
-
    ///////////////////////////////////////////////////////////////////////////
    // exp calculations
    ///////////////////////////////////////////////////////////////////////////
@ -215,7 +133,7 @@ module fcvt (
                `FMT: NewBiasToFp =  (`NE-1)'(`BIAS);
                `FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
                `FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
-                default: NewBiasToFp = 1'bx;
+                default: NewBiasToFp = {`NE-1{1'bx}};
            endcase
        assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 

@ -262,40 +180,11 @@ module fcvt (
    //                  - shift left to normilize (-1-ZeroCnt)
    //                  - newBias to make the biased exponent
    //          
-    assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
+    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
    // find if the result is dnormal or underflows
    //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
    //      - can't underflow an integer to Fp conversion
-    assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
-    // choose the negative of the fraction size
-    if (`FPSIZES == 1) begin
-        assign ResNegNF = -($clog2(`NF)+1)'(`NF); 
-
-    end else if (`FPSIZES == 2) begin
-        assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT:  ResNegNF = -($clog2(`NF)+1)'(`NF);
-                `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
-                `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
-                default: ResNegNF = 1'bx;
-            endcase
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (OutFmt)
-                2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
-                2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
-                2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
-                2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
-            endcase
-    end
-    // determine if the result underflows ??? -> fp
-    //      - if the first 1 is shifted out of the result then the result underflows
-    //      - can't underflow an integer to fp conversions
-    assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
+    assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp;

    
    ///////////////////////////////////////////////////////////////////////////
@ -307,498 +196,7 @@ module fcvt (
    //          - if 64-bit : check the msb of the 64-bit integer input and if it's signed
    //          - if 32-bit : check the msb of the 32-bit integer input and if it's signed
    //      - otherwise: the floating point input's sign
-    assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
+    assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;

-    ///////////////////////////////////////////////////////////////////////////
-    // rounding
-    ///////////////////////////////////////////////////////////////////////////
+endmodule

-    // round to nearest even
-    //      {Round, Sticky}
-    //      0x - do nothing
-    //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
-    //      11 - Plus1
-
-    //  round to zero - do nothing
-
-    //  round to -infinity - Plus1 if negative
-
-    //  round to infinity - Plus1 if positive
-
-    //  round to nearest max magnitude
-    //      {Guard, Round, Sticky}
-    //      0x - do nothing
-    //      1x - Plus1
-    // ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
-    if (`FPSIZES == 1) begin
-        assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
-        assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
-        assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
-
-    end else if (`FPSIZES == 2) begin    
-        assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : 
-                        (OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
-        assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : 
-                        OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
-        assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : 
-                        OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
-
-    end else if (`FPSIZES == 3) begin
-        logic ToFpSticky, ToFpRound, ToFpLSBFrac;
-        always_comb
-            case (OutFmt)
-                `FMT:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`NF-`NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
-                end
-                `FMT1:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`NF-`NF1];
-                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
-                end
-                `FMT2:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`NF-`NF2];
-                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
-                end
-                default:  begin 
-                     ToFpSticky = 1'bx;
-                     ToFpRound = 1'bx;
-                     ToFpLSBFrac = 1'bx;
-                end
-            endcase
-            assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
-            assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
-            assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
-
-    end else if (`FPSIZES == 4) begin        
-        logic ToFpSticky, ToFpRound, ToFpLSBFrac;
-        always_comb
-            case (OutFmt)
-                2'h3:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`Q_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
-                end
-                2'h1:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`D_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
-                end
-                2'h0:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`S_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
-                end
-                2'h2:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`H_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
-                end
-            endcase
-            assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
-            assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
-            assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
-    end
-
-    always_comb
-        // Determine if you add 1
-        case (FrmE)
-            3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
-            3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = ResSgn;//round down
-            3'b011: CalcPlus1 = ~ResSgn;//round up
-            3'b100: CalcPlus1 = Round;//round to nearest max magnitude
-            default: CalcPlus1 = 1'bx;
-        endcase
-
-    // dont round if exact
-    assign Plus1 = CalcPlus1&(Round|Sticky);
-
-    // shift the 1 to the propper position for rounding
-    //     - dont round it converting to integer
-    if (`FPSIZES == 1) begin
-        assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
-
-    end else if (`FPSIZES == 2) begin
-        assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT:  ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
-                `FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
-                `FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
-                default: ShiftedPlus1 = 0;
-            endcase
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (OutFmt)
-                2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
-                2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
-                2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
-                2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
-            endcase
-    end
-    // kill calcExp if the result is denormalized
-    assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
-    // trim the result's expoent to size
-    assign ResExp = FullResExp[`NE-1:0];
-    ///////////////////////////////////////////////////////////////////////////
-    // flags
-    ///////////////////////////////////////////////////////////////////////////
-    
-    // calculate the flags
-
-    // find the maximum exponent (the exponent and larger overflows)
-    if (`FPSIZES == 1) begin
-        assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
-
-    end else if (`FPSIZES == 2) begin    
-        assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
-                OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
-
-    end else if (`FPSIZES == 3) begin
-        logic [`NE-1:0] MaxExpFp;
-        always_comb
-            case (OutFmt)
-                `FMT:  begin 
-                     MaxExpFp = {`NE{1'b1}};
-                end
-                `FMT1:  begin 
-                     MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
-                end
-                `FMT2:  begin 
-                     MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
-                end
-                default:  begin 
-                     MaxExpFp = 1'bx;
-                end
-            endcase
-            assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
-
-    end else if (`FPSIZES == 4) begin        
-        logic [`NE-1:0] MaxExpFp;
-        always_comb
-            case (OutFmt)
-                2'h3:  begin 
-                     MaxExpFp = {`Q_NE{1'b1}};
-                end
-                2'h1:  begin 
-                     MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
-                end
-                2'h0:  begin 
-                     MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
-                end
-                2'h2:  begin 
-                     MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
-                end
-            endcase
-            assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
-    end
-
-    //                 if the result exponent is larger then the maximum possible exponent
-    //                 |                  and the exponent is positive
-    //                 |                  |             and the input is not NaN or Infinity
-    //                 |                  |             |
-    assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
-
-    //                 if the result is denormalized or underflowed
-    //                 |             and the result did not round into normal values
-    //                 |             |                             and the result is not exact
-    //                 |             |                             |              and the result isn't NaN
-    //                 |             |                             |              |
-    assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
-
-    // we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
-    //                  if there were bits thrown away
-    //                  |            if overflowed or underflowed
-    //                  |            |                    and if not a NaN
-    //                  |            |                    |
-    assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
-
-    //                  if the result is too small to be represented and not 0
-    //                  |                                     and if the result is not invalid (outside the integer bounds)
-    //                  |                                     |
-    assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
-
-    // select the inexact flag to output
-    assign Inexact = ToInt ? IntInexact : FpInexact;
-
-    //                  if an input was a singaling NaN(and we're using a FP input)
-    //                  |
-    assign FpInvalid = (XSNaNE&~IntToFp);
-
-    assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
-			              Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
-    //                  if the input is NaN or infinity
-    //                  |           if the integer result overflows (out of range) 
-    //                  |           |         if the input was negitive but ouputing to a unsigned number
-    //                  |           |         |                    the result doesn't round to zero
-    //                  |           |         |                    |               or the result rounds up out of bounds
-    //                  |           |         |                    |                       and the result didn't underflow
-    //                  |           |         |                    |                       |
-    assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
-    //                                                                                                     |
-    //                                                                                                     or when the positive result rounds up out of range
-    // select the inexact flag to output
-    assign Invalid = ToInt ? IntInvalid : FpInvalid;
-    // pack the flags together
-    //      - fp -> int does not set the overflow or underflow flags
-    assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
-
-
-    ///////////////////////////////////////////////////////////////////////////
-    // result selection
-    ///////////////////////////////////////////////////////////////////////////
-
-    // determine if you shoould kill the result
-    //      - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
-    //      - dont set to zero if fp input is zero but not using the fp input
-    //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
-
-    if (`FPSIZES == 1) begin        
-        // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-        if(`IEEE754) begin
-            assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
-        end else begin 
-            assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
-        end
-        // determine the infinity result
-        //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-        //      - otherwise: output infinity with the correct sign
-        //      - kill the infinity singal if the input isn't fp
-        assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-
-        // result for when the result is killed i.e. underflowes
-        //      - output a rounded 0 with the correct sign
-        assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
-
-        // format the result - NaN box single precision (put 1's in the unused msbs)
-        assign Res   = {ResSgn, ResExp, ResFrac};
-
-
-    end else if (`FPSIZES == 2) begin
-        // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-        if(`IEEE754) begin
-            assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
-        end else begin 
-            assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
-        end
-        // determine the infinity result
-        //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-        //      - otherwise: output infinity with the correct sign
-        //      - kill the infinity singal if the input isn't fp
-        assign InfRes =  OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                                        {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                                 (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
-                                                                                                                                        {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-
-        // result for when the result is killed i.e. underflowes
-        //      - output a rounded 0 with the correct sign
-        assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
-
-        // format the result - NaN box single precision (put 1's in the unused msbs)
-        assign Res   = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
-                    end else begin 
-                        NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {ResSgn, ResExp, ResFrac};
-                end
-                `FMT1: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
-                    end else begin 
-                        NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
-                end
-                `FMT2: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
-                    end else begin 
-                        NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
-                end
-                default: begin
-                    NaNRes = 1'bx;
-                    InfRes = 1'bx;
-                    UfRes  = 1'bx;
-                    Res    = 1'bx;
-                end
-            endcase
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (OutFmt)
-                2'h3: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
-                    end else begin 
-                        NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {ResSgn, ResExp, ResFrac};
-                end
-                2'h1: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
-                    end else begin 
-                        NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
-                end
-                2'h0: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
-                    end else begin 
-                        NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
-                end
-                2'h2: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
-                    end else begin 
-                        NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
-                end
-            endcase
-    end
-
-    
-    // choose the floating point result
-    //      - if the input is NaN (and using the NaN input) output the NaN result
-    //      - if the input is infinity or the output overflows
-    //      - kill the InfE signal if the input isn't a floating point value
-    //      - if killing the result output the underflow result
-    //      - otherwise output the normal result
-    assign CvtResE = XNaNE&~IntToFp ? NaNRes : 
-                     (XInfE&~IntToFp)|Overflow ? InfRes :
-                     KillRes ? UfRes :
-                     Res;
-    // *** probably can optimize the negation
-    // select the overflow integer result
-    //      - negitive infinity and out of range negitive input
-    //                 |  int  |  long  |
-    //          signed | -2^31 | -2^63  |
-    //        unsigned |   0   |    0   |
-    //
-    //      - positive infinity and out of range negitive input and NaNs
-    //                 |   int  |  long  |
-    //          signed | 2^31-1 | 2^63-1 |
-    //        unsigned | 2^32-1 | 2^64-1 |
-    //
-    //      other: 32 bit unsinged result should be sign extended as if it were a signed number
-    assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
-                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
-                               XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
-                                              {`XLEN{1'b1}};// unsigned positive
-    
-    // round and negate the positive result if needed
-    assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
-    // select the integer output
-    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow result
-    //      - if the input underflows
-    //          - if rounding and signed opperation and negitive input, output -1
-    //          - otherwise output a rounded 0
-    //      - otherwise output the normal result (trmined and sign extended if nessisary)
-    assign CvtIntResE = Invalid ?  OfIntRes :
-			            CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
-                        Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
-
-endmodule
--- a/pipelined/src/fpu/fhazard.sv
+++ b/pipelined/src/fpu/fhazard.sv
@ -34,7 +34,7 @@ module fhazard(
    input logic [4:0]   Adr1E, Adr2E, Adr3E,    // read data adresses
    input logic         FRegWriteM, FRegWriteW, // is the fp register being written to
 	  input logic [4:0]   RdM, RdW,               // the adress being written to
-    input logic [1:0]   FResultSelM,            // the result being selected
+    input logic [1:0]   FResSelM,            // the result being selected
    output logic        FStallD,                // stall the decode stage
    output logic [1:0]  FForwardXE, FForwardYE, FForwardZE // select a forwarded value
 );
@ -47,10 +47,12 @@ module fhazard(
    FForwardZE = 2'b00; // choose FRD3E
    FStallD = 0;

+    //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
+
    // if the needed value is in the memory stage - input 1
    if ((Adr1E == RdM) & FRegWriteM) 
      // if the result will be FResM (can be taken from the memory stage)
-      if(FResultSelM == 2'b11) FForwardXE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
      else FStallD = 1;                             // otherwise stall
    // if the needed value is in the writeback stage
    else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
@ -59,7 +61,7 @@ module fhazard(
    // if the needed value is in the memory stage - input 2
    if ((Adr2E == RdM) & FRegWriteM)
      // if the result will be FResM (can be taken from the memory stage)
-      if(FResultSelM == 2'b11) FForwardYE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
      else FStallD = 1;                             // otherwise stall
    // if the needed value is in the writeback stage
    else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
@ -68,7 +70,7 @@ module fhazard(
    // if the needed value is in the memory stage - input 3
    if ((Adr3E == RdM) & FRegWriteM)
      // if the result will be FResM (can be taken from the memory stage)
-      if(FResultSelM == 2'b11) FForwardZE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
      else FStallD = 1;                             // otherwise stall
    // if the needed value is in the writeback stage
    else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@ -0,0 +1,147 @@
+`include "wally-config.vh"
+
+module flags(
+    input logic                 XSgnM,
+    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
+    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic Plus1,
+    input logic                 InfIn,                  // is a Inf input being used
+    input logic                 XZeroM, YZeroM,         // inputs are zero
+    input logic                 XNaNM, YNaNM,           // inputs are NaN
+    input logic                 NaNIn,                  // is a NaN input being used
+    input logic                 Sqrt,                   // Sqrt?
+    input logic                 ToInt,                  // convert to integer
+    input logic                 IntToFp,                // convert integer to floating point
+    input logic                 Int64,                  // convert to 64 bit integer
+    input logic                 Signed,                 // convert to a signed integer
+    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
+    input logic [`NE:0]         CvtCalcExpM,            // the calculated expoent - Cvt
+    input logic                 CvtOp,                  // conversion opperation?
+    input logic                 DivOp,                  // conversion opperation?
+    input logic                 FmaOp,                  // Fma opperation?
+    input logic  [`NE+1:0]      FullResExp,             // ResExp with bits to determine sign and overflow
+    input logic  [`NE+1:0]      RoundExp,               // exponent of the normalized sum
+    input logic  [1:0]          NegResMSBS,             // the negitive integer result's most significant bits
+    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
+    input logic                 Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
+    output logic                IntInvalid, Invalid, Overflow, Underflow, // flags used to select the res
+    output logic [4:0]          PostProcFlgM // flags
+);
+    logic               SigNaN;     // is an input a signaling NaN
+    logic               Inexact;    // inexact flag
+    logic               FpInexact;  // floating point inexact flag
+    logic               IntInexact; // integer inexact flag
+    logic               FmaInvalid; // integer invalid flag
+    logic               DivInvalid; // integer invalid flag
+    logic               DivByZero;
+    logic               ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
+    logic               ShiftGtIntSz; // is the shift greater than the the integer size (use ResExp to account for possible roundning "shift")
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Flags
+    ///////////////////////////////////////////////////////////////////////////////
+
+
+
+   if (`FPSIZES == 1) begin
+        assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
+        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+
+    end else if (`FPSIZES == 2) begin    
+        assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
+
+        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
+                `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
+                `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
+                default: ResExpGteMax = 1'bx;
+            endcase
+            assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (OutFmt)
+                `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
+                `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
+                `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
+                `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
+            endcase
+            // a left shift of intlen+1 is still in range but any more than that is an overflow
+            //           inital: |      64 0's         |    XLEN     |
+            //                   |      64 0's         |    XLEN     | << 64
+            //                   |      XLEN           |    00000... |
+            // 65 = ...0 0 0 0   0 1 0 0   0 0 0 1
+            //      |     or      | |     or      |
+            // 33 = ...0 0 0 0   0 0 1 0   0 0 0 1
+            //      |     or        | |     or    |
+            // larger or equal if:
+            //      - any of the bits after the most significan 1 is one
+            //      - the most signifcant in 65 or 33 is still a one in the number and
+            //        one of the later bits is one
+            assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+    end
+
+    //                 if the result is greater than or equal to the max exponent(not taking into account sign)
+    //                 |           and the exponent isn't negitive
+    //                 |           |                   if the input isnt infinity or NaN
+    //                 |           |                   |            
+    assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
+
+    // detecting tininess after rounding
+    //                  the exponent is negitive
+    //                  |                    the result is denormalized
+    //                  |                    |                    the result is normal and rounded from a denorm
+    //                  |                    |                    |                                      and if given an unbounded exponent the result does not round
+    //                  |                    |                    |                                      |                     and if the result is not exact
+    //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
+    //                  |                    |                    |                                      |                     |               |
+    assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
+
+    // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
+    //      - Don't set the underflow flag if an underflowed res isn't outputed
+    assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
+
+    //                  if the res is too small to be represented and not 0
+    //                  |                                     and if the res is not invalid (outside the integer bounds)
+    //                  |                                     |
+    assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~IntInvalid;
+
+    // select the inexact flag to output
+    assign Inexact = ToInt ? IntInexact : FpInexact;
+
+    // Set Invalid flag for following cases:
+    //   1) any input is a signaling NaN
+    //   2) Inf - Inf (unless x or y is NaN)
+    //   3) 0 * Inf
+
+    //                  if the input is NaN or infinity
+    //                  |           if the integer res overflows (out of range) 
+    //                  |           |                                  if the input was negitive but ouputing to a unsigned number
+    //                  |           |                                  |                    the res doesn't round to zero
+    //                  |           |                                  |                    |               or the res rounds up out of bounds
+    //                  |           |                                  |                    |                       and the res didn't underflow
+    //                  |           |                                  |                    |                       |
+    assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
+    //                                                                                                     |
+    //                                                                                                     or when the positive res rounds up out of range
+    assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
+    assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
+    assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
+
+    assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
+
+
+    assign DivByZero = YZeroM&DivOp;  
+
+    // Combine flags
+    //      - to integer results do not set the underflow or overflow flags
+    assign PostProcFlgM = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
+
+endmodule
+
+
+
+
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -30,73 +30,6 @@
 `include "wally-config.vh"

 module fma(
-    input logic                 clk,
-    input logic                 reset,
-    input logic                 FlushM,     // flush the memory stage
-    input logic                 StallM,     // stall memory stage
-    input logic  [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
-    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
-    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 XSgnE, YSgnE, ZSgnE,    // input signs - execute stage
-    input logic [`NE-1:0]       XExpE, YExpE, ZExpE,    // input exponents - execute stage
-    input logic [`NF:0]         XManE, YManE, ZManE,    // input mantissa - execute stage
-    input logic                 XSgnM, YSgnM,           // input signs - memory stage
-    input logic [`NE-1:0]       ZExpM,    // input exponents - memory stage
-    input logic [`NF:0]         XManM, YManM, ZManM,    // input mantissa - memory stage
-    input logic                 ZDenormE, // is denorm
-    input logic                 XZeroE, YZeroE, ZZeroE,     // is zero - execute stage
-    input logic                 XNaNM, YNaNM, ZNaNM,        // is NaN
-    input logic                 XSNaNM, YSNaNM, ZSNaNM,     // is signaling NaN
-    input logic                 XZeroM, YZeroM, ZZeroM,     // is zero - memory stage
-    input logic                 XInfM, YInfM, ZInfM,        // is infinity
-	output logic [`FLEN-1:0]    FMAResM,    // FMA result
-	output logic [4:0]		    FMAFlgM);   // FMA flags
-	
-  //fma/mult/add	
-      //  fmadd  = 000
-      //  fmsub  = 001
-      //  fnmsub = 010	-(a*b)+c
-      //  fnmadd = 011  -(a*b)-c
-      //  fmul   = 100
-      //  fadd   = 110
-      //  fsub   = 111
-
-    // signals transfered between pipeline stages
-    logic [3*`NF+5:0]	SumE, SumM;                       
-    logic [`NE+1:0]	    ProdExpE, ProdExpM;
-    logic 			    AddendStickyE, AddendStickyM;
-    logic 			    KillProdE, KillProdM;
-    logic 			    InvZE, InvZM;
-    logic 			    NegSumE, NegSumM;
-    logic 			    ZSgnEffE, ZSgnEffM;
-    logic 			    PSgnE, PSgnM;
-    logic [$clog2(3*`NF+7)-1:0]			NormCntE, NormCntM;
-    logic               Mult;
-    logic               ZDenormM;
-    
-    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-                .XZeroE, .YZeroE, .ZZeroE,
-                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
-                .ProdExpE, .AddendStickyE, .KillProdE); 
-                
-    // E/M pipeline registers
-    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
-    flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-    flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
-                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
-
-    fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
-            .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
-            .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
-            .FMAResM, .FMAFlgM);
-
-endmodule
-      
-
-        //*** in al units before putting into : ? put in a seperate signal
-
-module fma1(
    input logic                 XSgnE, YSgnE, ZSgnE,    // input's signs
    input logic  [`NE-1:0]      XExpE, YExpE, ZExpE,    // biased exponents in B(NE.0) format
    input logic  [`NF:0]        XManE, YManE, ZManE,    // fractions in U(0.NF) format
@ -111,7 +44,7 @@ module fma1(
    output logic                InvZE,          // intert Z
    output logic                ZSgnEffE,       // the modified Z sign
    output logic                PSgnE,          // the product's sign
-    output logic [$clog2(3*`NF+7)-1:0]          NormCntE        // normalization shift cnt
+    output logic [$clog2(3*`NF+7)-1:0]          FmaNormCntE        // normalization shift cnt
    );

    logic [2*`NF+1:0]   ProdManE;           // 1.X frac * 1.Y frac in U(2.2Nf) format
@ -151,7 +84,7 @@ module fma1(
        
    add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
    
-    loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
+    loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE);

    // Choose the positive sum and accompanying LZA result.
    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
@ -332,7 +265,7 @@ endmodule
 module loa( //https://ieeexplore.ieee.org/abstract/document/930098
    input logic  [3*`NF+6:0] A,     // addend
    input logic  [2*`NF+1:0] P,     // product
-    output logic [$clog2(3*`NF+7)-1:0]       NormCntE   // normalization shift count for the positive result
+    output logic [$clog2(3*`NF+7)-1:0]       FmaNormCntE   // normalization shift count for the positive result
    ); 
    
    logic [3*`NF+6:0] T;
@ -360,861 +293,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098



-    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
+    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
  
 endmodule
-
-
-
-
-
-
-
-
-
-module fma2(
-    
-    input logic                             XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]               ZExpM, // input exponents
-    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic     [`FMTBITS-1:0]          FmtM,       // precision 1 = double 0 = single
-    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
-    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
-    input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic     [3*`NF+5:0]             SumM,       // the positive sum
-    input logic                             NegSumM,    // was the sum negitive
-    input logic                             InvZM,      // do you invert Z
-    input logic                             ZDenormM, // is the original precision denormalized
-    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                             PSgnM,      // the product's sign
-    input logic                             Mult,       // multiply opperation
-    input logic     [$clog2(3*`NF+7)-1:0]   NormCntM,   // the normalization shift count
-    output logic    [`FLEN-1:0]             FMAResM,    // FMA final result
-    output logic    [4:0]                   FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
-   
-
-
-    logic [`NF-1:0]     ResultFrac; // Result fraction
-    logic [`NE-1:0]     ResultExp;  // Result exponent
-    logic               ResultSgn, ResultSgnTmp;  // Result sign
-    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
-    logic [`NE+1:0]     FullResultExp;  // ResultExp with bits to determine sign and overflow
-    logic [`NF+1:0]     NormSum;        // normalized sum
-    logic               NormSumSticky;  // sticky bit calulated from the normalized sum
-    logic               SumZero;        // is the sum zero
-    logic               ResultDenorm;   // is the result denormalized
-    logic               Sticky, UfSticky;           // Sticky bit
-    logic               CalcPlus1;                  // do you add or subtract one for rounding
-    logic               UfPlus1;                    // do you add one (for determining underflow flag)
-    logic               Invalid,Underflow,Overflow; // flags
-    logic               Guard, Round;   // bits needed to determine rounding
-    logic               UfLSBNormSum;   // bits needed to determine rounding for underflow flag
-    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
-   
-    
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Normalization
-    ///////////////////////////////////////////////////////////////////////////////
-
-    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum, 
-            .ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Rounding
-    ///////////////////////////////////////////////////////////////////////////////
-
-    // round to nearest even
-    // round to zero
-    // round to -infinity
-    // round to infinity
-    // round to nearest max magnitude
-
-    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
-        .CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
-
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Sign calculation
-    ///////////////////////////////////////////////////////////////////////////////
-
- 
-    resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .Mult, .ResultSgnTmp, .ResultSgn);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Flags
-    ///////////////////////////////////////////////////////////////////////////////
-
-    fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
-        .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfLSBNormSum, .Sticky, .UfPlus1,
-        .FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Select the result
-    ///////////////////////////////////////////////////////////////////////////////
-
-    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
-        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
-        .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
-        .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
-
-// *** use NF where needed
-
-endmodule
-
-module resultsign(
-    input logic [2:0]   FrmM,
-    input logic         PSgnM, ZSgnEffM,
-    input logic         Underflow,
-    input logic         InvZM,
-    input logic         NegSumM,
-    input logic         SumZero,
-    input logic         Mult,
-    output logic        ResultSgnTmp,
-    output logic        ResultSgn
-);
-
-    logic ZeroSgn;
-    // logic ResultSgnTmp;
-
-    // Determine the sign if the sum is zero
-    //      if cancelation then 0 unless round to -infinity
-    //      if multiply then Psgn
-    //      otherwise psign
-    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
-
-    // is the result negitive
-    //  if p - z is the Sum negitive
-    //  if -p + z is the Sum positive
-    //  if -p - z then the Sum is negitive
-    assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
-    assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
-
-endmodule
-
-
-module normalize(
-    input logic  [3*`NF+5:0]            SumM,       // the positive sum
-    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
-    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
-    input logic  [$clog2(3*`NF+7)-1:0]  NormCntM,   // normalization shift count
-    input logic  [`FMTBITS-1:0]         FmtM,       // precision 1 = double 0 = single
-    input logic                         KillProdM,  // is the product set to zero
-    input logic 			            ZDenormM,
-    input logic                         AddendStickyM,  // the sticky bit caclulated from the aligned addend
-    output logic [`NF+1:0]              NormSum,        // normalized sum
-    output logic                        SumZero,        // is the sum zero
-    output logic                        NormSumSticky, UfSticky,    // sticky bits
-    output logic [`NE+1:0]              SumExp,         // exponent of the normalized sum
-    output logic                        ResultDenorm    // is the result denormalized
-);
-    logic [`NE+1:0]             SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
-    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
-    logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
-    logic [3*`NF+8:0]           SumShifted;         // the shifted sum before LZA correction
-    logic [`NE+1:0]             SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
-    logic                       PreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Normalization
-    ///////////////////////////////////////////////////////////////////////////////
-    //*** insert bias-bias simplification in fcvt.sv/phone pictures
-    // Determine if the sum is zero
-    assign SumZero = ~(|SumM);
-
-    // calculate the sum's exponent
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
-
-    //convert the sum's exponent into the propper percision
-    if (`FPSIZES == 1) begin
-        assign SumExpTmp = SumExpTmpTmp;
-
-    end else if (`FPSIZES == 2) begin
-        assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: SumExpTmp = SumExpTmpTmp;
-                `FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
-                `FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
-                default: SumExpTmp = `NE+2'bx;
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin
-        always_comb begin
-            case (FmtM)
-                2'h3: SumExpTmp = SumExpTmpTmp;
-                2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
-                2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
-                2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
-            endcase
-        end
-
-    end
-    
-    // determine if the result is denormalized
-    
-    if (`FPSIZES == 1) begin
-        logic Sum0LEZ, Sum0GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
-
-    end else if (`FPSIZES == 2) begin
-        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
-        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
-        assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
-
-    end else if (`FPSIZES == 3) begin
-        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
-        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
-        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
-        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
-        always_comb begin
-            case (FmtM)
-                `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
-                `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
-                `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
-                default: PreResultDenorm = 1'bx;
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin
-        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF  )-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
-        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
-        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
-        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
-        assign Sum3LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
-        assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
-        always_comb begin
-            case (FmtM)
-                2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
-                2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
-                2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
-                2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
-            endcase
-        end
-
-    end
-
-    // 010. when should be 001.
-    //      - shift left one
-    //      - add one from exp
-    //      - if kill prod dont add to exp
-
-    // Determine if the result is denormal
-    // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
-
-    // Determine the shift needed for denormal results
-    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
-    // Normalize the sum
-    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
-    // LZA correction
-    assign LZAPlus1 = SumShifted[3*`NF+7];
-    assign LZAPlus2 = SumShifted[3*`NF+8];
-	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
-    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
-    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
-
-    // Calculate the sticky bit
-    if (`FPSIZES == 1) begin
-        assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
-
-    end else if (`FPSIZES == 2) begin
-        // 3*NF+5 - NF1 - 3
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
-        (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
-
-    end else if (`FPSIZES == 3) begin
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
-        (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) | 
-        (|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
-
-    end else if (`FPSIZES == 4) begin        
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
-        (|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | 
-        (|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
-        (|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
-
-    end
-
-    assign UfSticky = AddendStickyM | NormSumSticky;
-
-    // Determine sum's exponent
-    //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
-    // recalculate if the result is denormalized
-    assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
-
-endmodule
-
-module fmaround(
-    input logic  [`FMTBITS-1:0] FmtM,       // precision 1 = double 0 = single
-    input logic  [2:0]          FrmM,       // rounding mode
-    input logic                 UfSticky,   // sticky bit for underlow calculation
-    input logic  [`NF+1:0]      NormSum,    // normalized sum
-    input logic                 AddendStickyM,  // addend's sticky bit
-    input logic                 NormSumSticky,  // normalized sum's sticky bit
-    input logic                 ZZeroM,         // is Z zero
-    input logic                 InvZM,          // invert Z
-    input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
-    input logic                 ResultSgnTmp,      // the result's sign
-    output logic                CalcPlus1, UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]      FullResultExp,      // ResultExp with bits to determine sign and overflow
-    output logic [`NF-1:0]      ResultFrac,         // Result fraction
-    output logic [`NE-1:0]      ResultExp,          // Result exponent
-    output logic                Sticky,             // sticky bit
-    output logic [`FLEN:0]      RoundAdd,           // how much to add to the result
-    output logic                Round, Guard, UfLSBNormSum // bits needed to calculate rounding
-);
-    logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
-    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic           UfGuard;            // guard bit used to caluculate underflow
-    logic           UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
-    logic [`NF-1:0] NormSumTruncated;   // the normalized sum trimed to fit the mantissa
-    logic           UfRound;
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Rounding
-    ///////////////////////////////////////////////////////////////////////////////
-
-    // round to nearest even
-    //      {Guard, Round, Sticky}
-    //      0xx - do nothing
-    //      100 - tie - Plus1 if result is odd  (LSBNormSum = 1)
-    //          - don't add 1 if a small number was supposed to be subtracted
-    //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
-    //      110/111 - Plus1
-
-    //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
-
-    //  round to -infinity
-    //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
-    //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
-
-    //  round to infinity
-    //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
-    //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
-
-    //  round to nearest max magnitude
-    //      {Guard, Round, Sticky}
-    //      0xx - do nothing
-    //      100 - tie - Plus1
-    //          - don't add 1 if a small number was supposed to be subtracted
-    //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
-    //      110/111 - Plus1
-
-    if (`FPSIZES == 1) begin
-        // determine guard, round, and least significant bit of the result
-        assign Round = NormSum[1];
-        assign LSBNormSum = NormSum[2];
-
-        // used to determine underflow flag
-        assign UfRound = NormSum[0];
-
-    end else if (`FPSIZES == 2) begin
-        //         \/-------------NF---------------,
-        //      |      NF1       | 2 |             |
-        //          '-------NF1------^
-
-        // determine guard, round, and least significant bit of the result
-        assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
-        assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
-
-        // used to determine underflow flag
-        assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
-
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[1];
-                    LSBNormSum = NormSum[2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[0];
-                end
-                `FMT1: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`NF1+1];
-                    LSBNormSum = NormSum[`NF-`NF1+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`NF1];
-                end
-                `FMT2: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`NF2+1];
-                    LSBNormSum = NormSum[`NF-`NF2+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`NF2];
-                end
-                default: begin
-                    Round = 1'bx;
-                    LSBNormSum = 1'bx;
-                    UfRound = 1'bx;
-                end
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin
-        always_comb begin
-            case (FmtM)
-                2'h3: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[1];
-                    LSBNormSum = NormSum[2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[0];
-                end
-                2'h1: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`D_NF+1];
-                    LSBNormSum = NormSum[`NF-`D_NF+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`D_NF];
-                end
-                2'h0: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`S_NF+1];
-                    LSBNormSum = NormSum[`NF-`S_NF+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`S_NF];
-                end
-                2'h2: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`H_NF+1];
-                    LSBNormSum = NormSum[`NF-`H_NF+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`H_NF];
-                end
-            endcase
-        end
-
-    end
-    // used to determine underflow flag
-    assign UfLSBNormSum = Round;
-    // determine sticky
-    assign Sticky = UfSticky | UfRound;
-
-
-    // Deterimine if a small number was supposed to be subtrated
-    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
-    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
-
-    always_comb begin
-        // Determine if you add 1
-        case (FrmM)
-            3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
-            3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
-            3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
-            3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
-            default: CalcPlus1 = 1'bx;
-        endcase
-        // Determine if you add 1 (for underflow flag)
-        case (FrmM)
-            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
-            3'b001: UfCalcPlus1 = 0;//round to zero
-            3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
-            3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
-            3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
-            default: UfCalcPlus1 = 1'bx;
-        endcase
-        // Determine if you subtract 1
-        case (FrmM)
-            3'b000: CalcMinus1 = 0;//round to nearest even
-            3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
-            3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
-            3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
-            3'b100: CalcMinus1 = 0;//round to nearest max magnitude
-            default: CalcMinus1 = 1'bx;
-        endcase
-   
-    end
-
-    // If an answer is exact don't round
-    assign Plus1 = CalcPlus1 & (Sticky | Round);
-    assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
-    assign Minus1 = CalcMinus1 & (Sticky | Round);
-
-    // Compute rounded result
-    if (`FPSIZES == 1) begin
-        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
-
-    end else if (`FPSIZES == 2) begin
-        // \/FLEN+1
-        //  | NE+2 |        NF      |
-        //  '-NE+2-^----NF1----^
-        // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
-        assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
-                                Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
-                `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
-                `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
-                default: RoundAdd = (`FLEN+1)'(0);
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb begin
-            case (FmtM)
-                2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
-                2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
-                2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
-                2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
-            endcase
-        end
-
-    end
-
-    assign NormSumTruncated = NormSum[`NF+1:2];
-    assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
-    assign ResultExp = FullResultExp[`NE-1:0];
-
-
-endmodule
-
-module fmaflags(
-    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XZeroM, YZeroM,         // inputs are zero
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic  [`NE+1:0]      FullResultExp,          // ResultExp with bits to determine sign and overflow
-    input logic  [`NE+1:0]      SumExp,                 // exponent of the normalized sum
-    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
-    input logic                 Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
-    input logic  [`FMTBITS-1:0] FmtM,                   // precision 1 = double 0 = single
-    output logic                Invalid, Overflow, Underflow, // flags used to select the result
-    output logic [4:0]          FMAFlgM // FMA flags
-);
-    logic               SigNaN;     // is an input a signaling NaN
-    logic               GtMaxExp;   // is exponent greater than the maximum
-    logic               UnderflowFlag, Inexact; // flags
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Flags
-    ///////////////////////////////////////////////////////////////////////////////
-
-
-
-    // Set Invalid flag for following cases:
-    //   1) any input is a signaling NaN
-    //   2) Inf - Inf (unless x or y is NaN)
-    //   3) 0 * Inf
-
-    assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
-    assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
-   
-    // Set Overflow flag if the number is too big to be represented
-    //      - Don't set the overflow flag if an overflowed result isn't outputed    
-    if (`FPSIZES == 1) begin
-        assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
-
-    end else if (`FPSIZES == 2) begin
-        assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
-                `FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
-                `FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
-                default: GtMaxExp = 1'bx;
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb begin
-            case (FmtM)
-                2'h3: GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
-                2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
-                2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
-                2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
-            endcase
-        end
-
-    end
-    assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-
-    // Set Underflow flag if the number is too small to be represented in normal numbers
-    //      - Don't set the underflow flag if the result is exact
-
-    assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-    //                      exp is negitive         result is denorm        exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
-    assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-    // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
-    //      - Don't set the underflow flag if an underflowed result isn't outputed
-    assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-
-    // Combine flags
-    //      - FMA can't set the Divide by zero flag
-    //      - Don't set the underflow flag if the result was rounded up to a normal number
-    assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
-
-endmodule
-
-
-module resultselect(
-    input logic                     XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]       ZExpM, // input exponents
-    input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic     [`FMTBITS-1:0]  FmtM,       // precision 1 = double 0 = single
-    input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                     ZDenormM, // is the original precision denormalized
-    input logic 		            ZZeroM,
-    input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                     PSgnM,      // the product's sign
-    input logic                     ResultSgn,  // the result's sign
-    input logic                     CalcPlus1,  // rounding bits
-    input logic     [`FLEN:0]       RoundAdd,   // how much to add to the result
-    input logic                     Invalid, Overflow, Underflow,  // flags
-    input logic                     ResultDenorm,       // is the result denormalized
-    input logic     [`NE-1:0]       ResultExp,          // Result exponent
-    input logic     [`NF-1:0]       ResultFrac,         // Result fraction
-    output logic    [`FLEN-1:0]     FMAResM     // FMA final result
-);
-    logic               InfSgn;
-    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
-    assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
-    if (`FPSIZES == 1) begin
-        if(`IEEE754) begin
-            assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-            assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-            assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
-            assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end else begin
-            assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end
-        assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                    {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
-        assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
-        assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
-        assign NormResult = {ResultSgn, ResultExp, ResultFrac};
-
-    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
-        if(`IEEE754) begin
-            assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
-            assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
-            assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
-            assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end else begin 
-            assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end
-        
-        assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                            {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                        ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
-                                                                                                                            {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
-        assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-        assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-                        YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-                        ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
-                        InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
-                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
-                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
-                    NormResult = {ResultSgn, ResultExp, ResultFrac};
-                end
-                `FMT1: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
-                        YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
-                        ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
-                        InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
-                                                                                                                                  {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
-                end
-                `FMT2: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
-                        YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
-                        ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
-                        InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
-                                                                                                                                  {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
-                end
-                default: begin
-                    if(`IEEE754) begin
-                        XNaNResult = (`FLEN)'(0);
-                        YNaNResult = (`FLEN)'(0);
-                        ZNaNResult = (`FLEN)'(0);
-                        InvalidResult = (`FLEN)'(0);
-                    end else begin 
-                        XNaNResult = (`FLEN)'(0);
-                    end
-                    OverflowResult = (`FLEN)'(0);
-                    KillProdResult = (`FLEN)'(0);
-                    UnderflowResult = (`FLEN)'(0);
-                    InfResult = (`FLEN)'(0);
-                    NormResult = (`FLEN)'(0);
-                end
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin 
-        always_comb begin
-            case (FmtM)
-                2'h3: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-                        YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-                        ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
-                        InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
-                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
-                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
-                    NormResult = {ResultSgn, ResultExp, ResultFrac};
-                end
-                2'h1: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
-                        YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
-                        ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
-                        InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
-                                                                                                                                  {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
-                end
-                2'h0: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
-                        YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
-                        ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
-                        InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
-                                                                                                                                  {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
-                end
-                2'h2: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
-                        YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
-                        ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
-                        InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
-                                                                                                              {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
-
-                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
-                    NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
-                end
-            endcase
-        end
-
-    end
-    if(`IEEE754) begin
-        assign FMAResM = XNaNM ? XNaNResult :
-                            YNaNM ? YNaNResult :
-                            ZNaNM ? ZNaNResult :
-                            Invalid ? InvalidResult :
-                            XInfM|YInfM|ZInfM ? InfResult :
-                            KillProdM ? KillProdResult :  
-                            Overflow ? OverflowResult :
-                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                            NormResult;
-    end else begin
-        assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
-                            XInfM|YInfM|ZInfM ? InfResult :
-                            KillProdM ? KillProdResult :  
-                            Overflow ? OverflowResult :
-                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                            NormResult;
-    end
-
-endmodule
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@ -0,0 +1,127 @@
+`include "wally-config.vh"
+
+module fmashiftcalc(
+    input logic  [3*`NF+5:0]            SumM,       // the positive sum
+    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
+    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
+    input logic  [$clog2(3*`NF+7)-1:0]  FmaNormCntM,   // normalization shift count
+    input logic  [`FMTBITS-1:0]         FmtM,       // precision 1 = double 0 = single
+    input logic                         KillProdM,  // is the product set to zero
+    input logic 			            ZDenormM,
+    output logic [`NE+1:0]              ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    output logic                        SumZero,    // is the result denormalized - calculated before LZA corection
+    output logic                        PreResultDenorm,    // is the result denormalized - calculated before LZA corection
+    output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
+    output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
+);
+    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
+    logic [`NE+1:0]             NormSumExp;       // the exponent of the normalized sum with the `FLEN bias
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Normalization
+    ///////////////////////////////////////////////////////////////////////////////
+    //*** insert bias-bias simplification in fcvt.sv/phone pictures
+    // Determine if the sum is zero
+    assign SumZero = ~(|SumM);
+
+    // calculate the sum's exponent
+    assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4);
+
+    //convert the sum's exponent into the propper percision
+    if (`FPSIZES == 1) begin
+        assign ConvNormSumExp = NormSumExp;
+
+    end else if (`FPSIZES == 2) begin
+        assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtM)
+                `FMT: ConvNormSumExp = NormSumExp;
+                `FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+                `FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
+                default: ConvNormSumExp = {`NE+2{1'bx}};
+            endcase
+        end
+
+    end else if (`FPSIZES == 4) begin
+        always_comb begin
+            case (FmtM)
+                2'h3: ConvNormSumExp = NormSumExp;
+                2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
+                2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
+                2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
+            endcase
+        end
+
+    end
+    
+    // determine if the result is denormalized
+    
+    if (`FPSIZES == 1) begin
+        logic Sum0LEZ, Sum0GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+
+    end else if (`FPSIZES == 2) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
+        assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
+
+    end else if (`FPSIZES == 3) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
+        assign Sum2LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
+        assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
+        always_comb begin
+            case (FmtM)
+                `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+                `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
+                `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
+                default: PreResultDenorm = 1'bx;
+            endcase
+        end
+
+    end else if (`FPSIZES == 4) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF  )-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
+        assign Sum2LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
+        assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
+        assign Sum3LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
+        assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
+        always_comb begin
+            case (FmtM)
+                2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+                2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
+                2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
+                2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
+            endcase
+        end
+
+    end
+
+    // 010. when should be 001.
+    //      - shift left one
+    //      - add one from exp
+    //      - if kill prod dont add to exp
+
+    // Determine if the result is denormal
+    // assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero;
+
+    // Determine the shift needed for denormal results
+    //  - if not denorm add 1 to shift out the leading 1
+    assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
+    // set and calculate the shift input and amount
+    assign FmaShiftIn = {3'b0, SumM};
+    assign FmaShiftAmt = FmaNormCntM+DenormShift;
+endmodule
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -45,17 +45,13 @@ module fpu (
  output logic 		   FWriteIntE, // integer register write enables
  output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
  output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
+  output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
+  output logic [1:0]       FResSelW,
  output logic 		   FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
  output logic 		   IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
  output logic [4:0] 	   SetFflagsM        // FPU flags (to privileged unit)
  );

-   //*** make everything FLEN at some point
-   //*** add the 128 bit support to the if statement when needed
-   //*** make new tests for fp using testfloat that include flag checking and all rounding modes
-   //*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
-   //*** only fma/mul and fp <-> int convert flags have been tested. test the others.
-
   // FPU specifics:
   //    - uses NaN-blocking format
   //        - if there are any unsused bits the most significant bits are filled with 1s
@ -68,24 +64,24 @@ module fpu (
   logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW;             // FP precision 0-single 1-double
   logic 		  FDivStartD, FDivStartE;             // Start division or squareroot
   logic 		  FWriteIntD;                         // Write to integer register
+   logic 		  FWriteIntM;                         // Write to integer register
   logic [1:0] 	  FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
-   logic [1:0] 	  FResultSelD, FResultSelE;           // Select the result written to FP register
-   logic [1:0] 	  FResultSelM, FResultSelW;           // Select the result written to FP register
-   logic [2:0] 	  FOpCtrlD, FOpCtrlE;       // Select which opperation to do in each component
-   logic [1:0] 	  FResSelD, FResSelE;       // Select one of the results that finish in the memory stage
-   logic [1:0] 	  FIntResSelD, FIntResSelE;           // Select the result written to the integer resister
+   logic [2:0] 	  FOpCtrlD, FOpCtrlE, FOpCtrlM;       // Select which opperation to do in each component
+   logic [1:0] 	  FResSelD, FResSelE, FResSelM;       // Select one of the results that finish in the memory stage
+   logic [1:0] 	  PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
   logic [4:0] 	  Adr1E, Adr2E, Adr3E;                // adresses of each input

   // regfile signals
   logic [`FLEN-1:0] 	  FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
   logic [`FLEN-1:0] 	  FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
   logic [`FLEN-1:0] 	  FSrcXE;                             // Input 1 to the various units (after forwarding)
+   logic [`XLEN-1:0] 	  IntSrcXE;                             // Input 1 to the various units (after forwarding)
   logic [`FLEN-1:0] 	  FPreSrcYE, FSrcYE;                  // Input 2 to the various units (after forwarding)
   logic [`FLEN-1:0] 	  FPreSrcZE, FSrcZE;                  // Input 3 to the various units (after forwarding)

   // unpacking signals
   logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
-   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
+   logic 		  XSgnM;                       // input's sign - memory stage
   logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
   logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
   logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
@ -95,7 +91,7 @@ module fpu (
   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
   logic 		  XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
   logic 		  XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
-   logic 		  XDenormE, ZDenormE;       // is the input denormalized
+   logic 		  XDenormE, ZDenormE, ZDenormM;       // is the input denormalized
   logic 		  XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
   logic 		  XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
   logic 		  XZeroQ, YZeroQ;                     // is the input zero - divide
@ -104,24 +100,43 @@ module fpu (
   logic 		  XInfQ, YInfQ;                       // is the input infinity - divide
   logic 		  XExpMaxE;                           // is the exponent all ones (max value)
   logic 		  FmtQ;
-   logic 		  FOpCtrlQ;     
+   logic 		  FOpCtrlQ;   
+
+   // Fma Signals
+    logic [3*`NF+5:0]	SumE, SumM;                       
+    logic [`NE+1:0]	    ProdExpE, ProdExpM;
+    logic 			    AddendStickyE, AddendStickyM;
+    logic 			    KillProdE, KillProdM;
+    logic 			    InvZE, InvZM;
+    logic 			    NegSumE, NegSumM;
+    logic 			    ZSgnEffE, ZSgnEffM;
+    logic 			    PSgnE, PSgnM;
+    logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;
+
+   // Cvt Signals
+    logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
+	 logic [`LOGLGLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
+    logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
+    logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
+    logic                   IntZeroE, IntZeroM;      // is the integer zero?
+    logic [`LGLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)

   // result and flag signals
   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
   logic [4:0] 	  FDivFlgM;                 // divide/squareroot flags  
-   logic [`FLEN-1:0] 	  FMAResM, FMAResW;                   // FMA/multiply result
-   logic [4:0] 	  FMAFlgM;                   // FMA/multiply result	
   logic [`FLEN-1:0] 	  ReadResW;                           // read result (load instruction)
-   logic [`FLEN-1:0] 	  CvtResE;                   // FP <-> int convert result
-   logic [`XLEN-1:0] CvtIntResE;                   // FP <-> int convert result
-   logic [4:0] 	  CvtFlgE;                   // FP <-> int convert flags //*** trim this	
   logic [`XLEN-1:0] 	  ClassResE;               // classify result
-   logic [`FLEN-1:0] 	  CmpResE;                   // compare result
-   logic 		  CmpNVE;                     // compare invalid flag (Not Valid)     
+   logic [`XLEN-1:0] 	  FIntResE;               // classify result
+   logic [`FLEN-1:0] 	  FpResM, FpResW;               // classify result
+   logic [`FLEN-1:0] 	  PostProcResM;               // classify result
+   logic [4:0] 	  PostProcFlgM;               // classify result
+   logic [`XLEN-1:0] FCvtIntResM; 
+   logic [`FLEN-1:0] 	  CmpFpResE;                   // compare result
+   logic [`XLEN-1:0] 	  CmpIntResE;                   // compare result
+   logic 		           CmpNVE;                     // compare invalid flag (Not Valid)     
   logic [`FLEN-1:0] 	  SgnResE;                   // sign injection result
-   logic [`FLEN-1:0] 	  FResE, FResM, FResW;                // selected result that is ready in the memory stage
-   logic [4:0] 	  FFlgE, FFlgM;                       // selected flag that is ready in the memory stage     
-   logic [`XLEN-1:0] 	  FIntResE;     
+   logic [`FLEN-1:0] 	  PreFpResE, PreFpResM, PreFpResW;                // selected result that is ready in the memory stage
+   logic  	        PreNVE, PreNVM;                       // selected flag that is ready in the memory stage     
   logic [`FLEN-1:0] 	  FPUResultW;                         // final FP result being written to the FP register     
   // other signals
   logic 		  FDivSqrtDoneE;                      // is divide done
@ -133,10 +148,20 @@ module fpu (
   
   // DECODE STAGE

+   //////////////////////////////////////////////////////////////////////////////////////////
+   //          |||||||||||
+   //          |||      |||
+   //          |||       |||
+   //          |||       |||
+   //          |||       |||
+   //          |||      |||
+   //          |||||||||||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
   // calculate FP control signals
   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
-      .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, 
-      .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
+      .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, 
+      .FmtD, .FrmD, .FWriteIntD);

   // FP register file
   fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -150,20 +175,31 @@ module fpu (
   flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                           {Adr1E, Adr2E, Adr3E});
-   flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE, 
-               {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
-               {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
+   flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+               {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
+               {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});

   // EXECUTION STAGE
+   
+   //////////////////////////////////////////////////////////////////////////////////////////
+   //          ||||||||||||
+   //          |||
+   //          |||       
+   //          |||||||||
+   //          |||     
+   //          |||      
+   //          ||||||||||||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
   // Hazard unit for FPU  
   //    - determines if any forwarding or stalls are needed
-   fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, 
+   fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, 
                  .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);

   // forwarding muxs
-   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
-   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
-   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
+   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
+   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
+   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);


   generate
@ -178,7 +214,7 @@ module fpu (
   endgenerate


-   mux2  #(`FLEN)  fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
+   mux2  #(`FLEN)  fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
   
   // Force Z to be 0 for multiply instructions 
   generate
@ -201,21 +237,12 @@ module fpu (
         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, 
         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
-
-   // FMA
-   //   - two stage FMA
-   //   - execute stage - multiplication and addend shifting
-   //   - memory stage  - addition and rounding
-   //   - handles FMA and multiply instructions
-   fma fma (.clk, .reset, .FlushM, .StallM, 
-      .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-      .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
-      .XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, 
-      .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
-      .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-      .FOpCtrlE,
-      .FmtE, .FmtM, .FrmM, 
-      .FMAFlgM, .FMAResM);
+   
+   // fma - does multiply, add, and multiply-add instructions 
+   fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, 
+            .XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, 
+            .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE, 
+            .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); 

   // fpdivsqrt using Goldschmidt's iteration
   if(`FLEN == 64) begin 
@ -245,11 +272,14 @@ module fpu (

   // other FP execution units
   fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
-            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
+            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
   fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
   fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
-   fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
-              .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
+
+   fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, 
+              .FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE, 
+              .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE, 
+              .CvtLzcInE);

   // data to be stored in memory - to IEU
   //    - FP uses NaN-blocking format
@ -269,16 +299,16 @@ module fpu (
                             {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
   endgenerate
   // select a result that may be written to the FP register
-   mux4  #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
-   mux4  #(5)  FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
+   mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
+   assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);

   // select the result that may be written to the integer register - to IEU
   if (`FLEN>`XLEN) 
-      mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE, 
-                  CvtIntResE, FIntResSelE, FIntResE);
+      assign IntSrcXE = FSrcXE[`XLEN-1:0];
   else 
-      mux4  #(`XLEN)  IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE, 
-                  CvtIntResE, FIntResSelE, FIntResE);
+      assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
+
+   mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
   // *** DH 5/25/22: CvtRes will move to mem stage.  Premux in execute to save area, then make sure stalls are ok
   // *** make sure the fpu matches the chapter diagram

@ -286,33 +316,68 @@ module fpu (

   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
-   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
+   flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
-   flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM, 
-            {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
-            {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});     
-   flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); 
-   flopenrc #(5)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);      
-   flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
-   flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
-               {FRegWriteE, FResultSelE, FrmE, FmtE},
-               {FRegWriteM, FResultSelM, FrmM, FmtM});
+   flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
+   flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
+   flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, 
+            {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
+            {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});     
+   flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
+   flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
+               {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
+               {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
+   flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
+   flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
+   flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, 
+                           {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
+                           {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
+   flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
+                           {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
+                           {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});

   // BEGIN MEMORY STAGE

+   //////////////////////////////////////////////////////////////////////////////////////////
+   //          |||         |||
+   //          ||||||   ||||||
+   //          ||| ||| ||| |||
+   //          |||  |||||  |||
+   //          |||   |||   |||
+   //          |||         |||
+   //          |||         |||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
+   postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+                           .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, 
+                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, 
+                           .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, 
+                           .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, 
+                           .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
+
   // FPU flag selection - to privileged
-   mux4  #(5)  FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
+   mux2  #(5)  FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
+   mux2  #(`FLEN)  FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM);

   // M/W pipe registers
-   flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
-   flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); 
-   flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
+   flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); 
+   flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
   flopenrc #(4+int'(`FMTBITS-1))  MWCtrlReg(clk, reset, FlushW, ~StallW,
-            {FRegWriteM, FResultSelM, FmtM},
-            {FRegWriteW, FResultSelW, FmtW});
+            {FRegWriteM, FResSelM, FmtM},
+            {FRegWriteW, FResSelW, FmtW});

   // BEGIN WRITEBACK STAGE

+   //////////////////////////////////////////////////////////////////////////////////////////
+   //         |||           |||
+   //         |||           |||
+   //         |||    |||    |||
+   //         |||   |||||   |||
+   //         |||  ||| |||  |||
+   //          ||||||   ||||||
+   //          |||         |||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
   // put ReadData into NaN-blocking format
   //    - if there are any unsused bits the most significant bits are filled with 1s
   //    - for load instruction
@ -328,6 +393,6 @@ module fpu (
   endgenerate

   // select the result to be written to the FP register
-   if(`FLEN>=64)
-   mux4  #(`FLEN)  FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
+   mux2  #(`FLEN)  FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
+
 endmodule // fpu
--- a/pipelined/src/fpu/fsgninj.sv
+++ b/pipelined/src/fpu/fsgninj.sv
@ -46,7 +46,7 @@ module fsgninj (
 	//
 	
 	// calculate the result's sign
-	assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
+	assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
 	
 	// format final result based on precision
 	//    - uses NaN-blocking format
@ -64,7 +64,7 @@ module fsgninj (
                `FMT: SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
                `FMT1: SgnResE = {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
                `FMT2: SgnResE = {{`FLEN-`LEN2{1'b1}}, ResSgn, FSrcXE[`LEN2-2:0]};
-                default: SgnResE = 0;
+                default: SgnResE = {`FLEN{1'bx}};
            endcase

    else if (`FPSIZES == 4)
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@ -0,0 +1,29 @@
+`include "wally-config.vh"
+
+module lzacorrection(
+    input logic  [`NORMSHIFTSZ-1:0]     Shifted,         // the shifted sum before LZA correction
+    input logic                         FmaOp,
+    input logic  [`NE+1:0]              ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic                         PreResultDenorm,    // is the result denormalized - calculated before LZA corection
+    input logic                         KillProdM,  // is the product set to zero
+    input logic                         SumZero,
+    output logic  [`CORRSHIFTSZ-1:0]    CorrShifted,         // the shifted sum before LZA correction
+    output logic [`NE+1:0]              SumExp         // exponent of the normalized sum
+);
+    logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
+    logic                        ResDenorm;    // is the result denormalized
+    logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
+
+    // LZA correction
+    assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
+    assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
+	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
+    assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
+    assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    // Determine sum's exponent
+    //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
+    assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
+    // recalculate if the result is denormalized
+    assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
+
+endmodule
--- a/pipelined/src/fpu/normshift.sv
+++ b/pipelined/src/fpu/normshift.sv
@ -0,0 +1,46 @@
+`include "wally-config.vh"
+
+
+ // convert shift
+    //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
+    //          process:
+    //              - start - CalcExp = 1 + XExp - Largest Bias
+    //                  |  `XLEN  zeros     |     Mantissa      | 0's if nessisary |
+    //
+    //              - shift left 1 (1)
+    //                  | `XLEN-1 zeros |bit|     frac      | 0's if nessisary |
+    //                                      . <- binary point
+    //
+    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
+    //                  |  0's |     Mantissa      |      0's if nessisary     |
+    //                  |     keep          |
+    //
+    //      fp -> fp:
+    //          - if result is denormalized or underflowed:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | << NF+CalcExp-1
+    //          process:
+    //             - start
+    //                 |     mantissa      | 0's |
+    //
+    //             - shift right by NF-1 (NF-1)
+    //                 |  `NF-1  zeros   |     mantissa      | 0's |
+    //
+    //             - shift left by CalcExp = XExp - Largest bias + new bias
+    //                 |   0's  |     mantissa      |     0's      |
+    //                 |       keep      |
+    //
+    //          - if the input is denormalized:
+    //              |     lzcIn      | 0's if nessisary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+    //
+    //      int -> fp: |     lzcIn      | 0's if nessisary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+
+module normshift(
+    input logic  [$clog2(`NORMSHIFTSZ)-1:0]      ShiftAmt,   // normalization shift count
+    input logic  [`NORMSHIFTSZ-1:0]              ShiftIn,        // is the sum zero
+    output logic [`NORMSHIFTSZ-1:0]             Shifted        // is the sum zero
+);
+    assign Shifted = ShiftIn << ShiftAmt;
+
+endmodule
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@ -0,0 +1,203 @@
+///////////////////////////////////////////
+//
+// Written: Katherine Parry, David Harris
+// Modified: 6/23/2021
+//
+// Purpose: Floating point multiply-accumulate of configurable size
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module postprocess(
+    input logic                             XSgnM,  // input signs
+    input logic     [`NE-1:0]               ZExpM, // input exponents
+    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [`FMTBITS-1:0]          FmtM,       // precision 1 = double 0 = single
+    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
+    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
+    input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
+    input logic     [3*`NF+5:0]             SumM,       // the positive sum
+    input logic                             NegSumM,    // was the sum negitive
+    input logic                             InvZM,      // do you invert Z
+    input logic                             ZDenormM, // is the original precision denormalized
+    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                             PSgnM,      // the product's sign
+    input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
+    input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
+    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    input logic CvtResDenormUfM,
+	input logic [`LOGLGLEN-1:0] CvtShiftAmtM,  // how much to shift by
+    input logic                   CvtResSgnM,     // the result's sign
+    input logic             FWriteIntM,     // is fp->int (since it's writting to the integer register)
+    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic             IntZeroM,         // is the input zero
+    input logic [1:0] PostProcSelM, // select result to be written to fp register
+    output logic    [`FLEN-1:0]    PostProcResM,    // FMA final result
+    output logic    [4:0]          PostProcFlgM,
+    output logic [`XLEN-1:0] FCvtIntResM    // the int conversion result
+    );
+   
+
+
+    logic [`NF-1:0]     ResFrac; // Result fraction
+    logic [`NE-1:0]     ResExp;  // Result exponent
+    logic  [`CORRSHIFTSZ-1:0]    CorrShifted;         // the shifted sum before LZA correction
+    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
+    logic [`NE+1:0]     FullResExp;  // ResExp with bits to determine sign and overflow
+    logic               SumZero;        // is the sum zero
+    logic               Sticky;           // Sticky bit
+    logic [3*`NF+8:0]            FmaShiftIn;        // is the sum zero
+    logic               UfPlus1;                    // do you add one (for determining underflow flag)
+    logic               Round;   // bits needed to determine rounding
+    logic [`LGLEN+`NF:0]    CvtShiftIn;    // number to be shifted
+    logic               Mult;       // multiply opperation
+    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
+    logic [`NE+1:0]     ConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
+    logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt;   // normalization shift count
+    logic [$clog2(`NORMSHIFTSZ)-1:0]  ShiftAmt;   // normalization shift count
+    logic [`NORMSHIFTSZ-1:0]            ShiftIn;        // is the sum zero
+    logic [`NORMSHIFTSZ-1:0]    Shifted;    // the shifted result
+    logic                   Plus1;      // add one to the final result?
+    logic                   IntInvalid, Overflow, Underflow, Invalid; // flags
+    logic                   Signed;     // is the opperation with a signed integer?
+    logic                   Int64;      // is the integer 64 bits?
+    logic                   IntToFp;       // is the opperation an int->fp conversion?
+    logic                   ToInt;      // is the opperation an fp->int conversion?
+    logic [`NE+1:0] RoundExp;
+    logic [1:0] NegResMSBS;
+    logic CvtOp;
+    logic FmaOp;
+    logic CvtResUf;
+    logic DivOp;
+    logic InfIn;
+    logic ResSgn;
+    logic NaNIn;
+    logic UfLSBRes;
+    logic Sqrt;
+    logic [`FMTBITS-1:0] OutFmt;
+
+    // signals to help readability
+    assign Signed = FOpCtrlM[0];
+    assign Int64 =  FOpCtrlM[1];
+    assign IntToFp =   FOpCtrlM[2];
+    assign ToInt =  FWriteIntM;
+    assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
+    assign CvtOp = (PostProcSelM == 2'b00);
+    assign FmaOp = (PostProcSelM == 2'b10);
+    assign DivOp = (PostProcSelM == 2'b01);
+    assign Sqrt = FOpCtrlM[0];
+
+    // is there an input of infinity or NaN being used
+    assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
+    assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp);
+
+    // choose the ouptut format depending on the opperation
+    //      - fp -> fp: OpCtrl contains the percision of the output
+    //      - otherwise: FmtM contains the percision of the output
+    if (`FPSIZES == 2) 
+        assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT); 
+    else if (`FPSIZES == 3 | `FPSIZES == 4) 
+        assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0]; 
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Normalization
+    ///////////////////////////////////////////////////////////////////////////////
+
+    cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,  
+                              .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
+    fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
+                          .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+
+    always_comb
+        case(PostProcSelM)
+            2'b10: begin // fma
+                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
+                ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
+            end
+            2'b00: begin // cvt
+                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
+                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
+            end
+            2'b01: begin //div
+                ShiftAmt = 0;//{DivShiftAmt};
+                ShiftIn =  0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
+            end
+            default: begin 
+                ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; 
+                ShiftIn = {`NORMSHIFTSZ{1'bx}}; 
+            end
+        endcase
+    
+    normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
+
+    lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
+                                .SumZero, .Shifted, .SumExp, .CorrShifted);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Rounding
+    ///////////////////////////////////////////////////////////////////////////////
+
+    // round to nearest even
+    // round to zero
+    // round to -infinity
+    // round to infinity
+    // round to nearest max magnitude
+
+    round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
+                .InvZM, .ResSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt,  .CvtResUf,
+                .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Sign calculation
+    ///////////////////////////////////////////////////////////////////////////////
+
+    resultsign resultsign(.FrmM, .PSgnM, .PostProcSelM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
+                          .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, .CvtResSgnM, .ResSgn);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Flags
+    ///////////////////////////////////////////////////////////////////////////////
+
+    flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM, 
+                .XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
+                .XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid,
+                .UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
+                .RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Select the result
+    ///////////////////////////////////////////////////////////////////////////////
+
+    resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM, .IntInvalid,
+        .IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf, 
+        .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
+        .FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
+
+endmodule
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@ -0,0 +1,282 @@
+`include "wally-config.vh"
+
+module resultselect(
+    input logic                     XSgnM,        // input signs
+    input logic     [`NE-1:0]       ZExpM, // input exponents
+    input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [`FMTBITS-1:0]  OutFmt,       // output format
+    input logic                     InfIn,
+    input logic                     XZeroM,
+    input logic                     IntZeroM,
+    input logic                     NaNIn,
+    input logic                     IntToFp,
+    input logic                     Int64,
+    input logic                     Signed,
+    input logic                     CvtOp,
+    input logic [`NORMSHIFTSZ-1:0]             Shifted,        // is the sum zero
+    input logic                     FmaOp,
+    input logic                     Plus1,
+    input logic [`NE:0]             CvtCalcExpM,    // the calculated expoent
+    input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                     ZDenormM, // is the original precision denormalized
+    input logic 		            ZZeroM,
+    input logic                     ResSgn,  // the res's sign
+    input logic     [`FLEN:0]       RoundAdd,   // how much to add to the res
+    input logic                     IntInvalid, Invalid, Overflow,  // flags
+    input logic CvtResUf,
+    input logic     [`NE-1:0]       ResExp,          // Res exponent
+    input logic     [`NE+1:0]       FullResExp,          // Res exponent
+    input logic     [`NF-1:0]       ResFrac,         // Res fraction
+    output logic    [`FLEN-1:0]     PostProcResM,     // final res
+    output logic [1:0] NegResMSBS,
+    output logic    [`XLEN-1:0]     FCvtIntResM     // final res
+);
+    logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
+    logic OfResMax;
+    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
+    logic [`XLEN+1:0]       NegRes;     // the negation of the result
+    logic KillRes;
+
+
+    // does the overflow result output the maximum normalized floating point number
+    //                output infinity if the input is infinity
+    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
+
+    if (`FPSIZES == 1) begin
+
+        //NaN res selection depending on standard
+        if(`IEEE754) begin
+            assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+            assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+            assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end else begin
+            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end
+
+        assign OfRes =  OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+        assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+        assign NormRes = {ResSgn, ResExp, ResFrac};
+
+    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
+        if(`IEEE754) begin
+            assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
+            assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
+            assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end else begin 
+            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end
+        
+        assign OfRes =  OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
+                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+        assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
+        assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+                    KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+                    NormRes = {ResSgn, ResExp, ResFrac};
+                end
+                `FMT1: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
+                        YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
+                        ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end
+                    OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+                    KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
+                end
+                `FMT2: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
+                        YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
+                        ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
+                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
+                    KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
+                end
+                default: begin
+                    if(`IEEE754) begin
+                        XNaNRes = (`FLEN)'(0);
+                        YNaNRes = (`FLEN)'(0);
+                        ZNaNRes = (`FLEN)'(0);
+                        InvalidRes = (`FLEN)'(0);
+                    end else begin 
+                        InvalidRes = (`FLEN)'(0);
+                    end
+                    OfRes = (`FLEN)'(0);
+                    KillProdRes = (`FLEN)'(0);
+                    UfRes = (`FLEN)'(0);
+                    NormRes = (`FLEN)'(0);
+                end
+            endcase
+
+    end else if (`FPSIZES == 4) begin 
+        always_comb
+            case (OutFmt)
+                2'h3: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+                    KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+                    NormRes = {ResSgn, ResExp, ResFrac};
+                end
+                2'h1: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
+                        YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
+                        ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
+                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end
+                    OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
+                    KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
+                end
+                2'h0: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
+                        YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
+                        ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
+                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
+                    KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
+                end
+                2'h2: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
+                        YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
+                        ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
+                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
+
+                    KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
+                end
+            endcase
+
+    end
+
+    
+
+
+
+    // determine if you shoould kill the res - Cvt
+    //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
+    //      - dont set to zero if fp input is zero but not using the fp input
+    //      - dont set to zero if int input is zero but not using the int input
+    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
+
+    if(`IEEE754) begin
+        assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
+                         YNaNM&~CvtOp ? YNaNRes :
+                         ZNaNM&FmaOp ? ZNaNRes :
+                         Invalid ? InvalidRes : 
+                         Overflow|InfIn ? OfRes :
+                         KillProdM&FmaOp ? KillProdRes : 
+                         KillRes ? UfRes :  
+                         NormRes;
+    end else begin
+        assign PostProcResM = NaNIn|Invalid ? InvalidRes :
+                         Overflow|InfIn ? OfRes :
+                         KillProdM&FmaOp ? KillProdRes :  
+                         KillRes ? UfRes :  
+                         NormRes;
+    end
+
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    //      |||||||||||   |||     |||   |||||||||||||
+    //          |||       ||||||  |||        |||
+    //          |||       ||| ||| |||        |||
+    //          |||       |||  ||||||        |||
+    //      |||||||||||   |||     |||        |||
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////        
+
+    // *** probably can optimize the negation
+    // select the overflow integer res
+    //      - negitive infinity and out of range negitive input
+    //                 |  int  |  long  |
+    //          signed | -2^31 | -2^63  |
+    //        unsigned |   0   |    0   |
+    //
+    //      - positive infinity and out of range negitive input and NaNs
+    //                 |   int  |  long  |
+    //          signed | 2^31-1 | 2^63-1 |
+    //        unsigned | 2^32-1 | 2^64-1 |
+    //
+    //      other: 32 bit unsinged res should be sign extended as if it were a signed number
+    assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
+                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
+                               XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
+                                              {`XLEN{1'b1}};// unsigned positive
+    
+    // round and negate the positive res if needed
+    assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
+    
+    //*** false critical path probably
+    assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
+			              Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
+
+    // select the integer output
+    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow res
+    //      - if the input underflows
+    //          - if rounding and signed opperation and negitive input, output -1
+    //          - otherwise output a rounded 0
+    //      - otherwise output the normal res (trmined and sign extended if nessisary)
+    assign FCvtIntResM = IntInvalid ?  OfIntRes :
+			            CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
+                        Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
+endmodule
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@ -0,0 +1,50 @@
+`include "wally-config.vh"
+
+module resultsign(
+    input logic [2:0]   FrmM,
+    input logic         PSgnM, ZSgnEffM,
+    input logic         InvZM,
+    input logic         ZInfM,
+    input logic         InfIn,
+    input logic         NegSumM,
+    input logic [1:0] PostProcSelM,
+    input logic [`NE+1:0] SumExp,
+    input logic         SumZero,
+    input logic         Mult,
+    input logic         Round,
+    input logic         Sticky,
+    input logic         CvtResSgnM,
+    output logic        ResSgn
+);
+
+    logic ZeroSgn;
+    logic InfSgn;
+    logic FmaResSgn;
+    logic FmaResSgnTmp;
+    logic Underflow;
+    // logic ResultSgnTmp;
+
+    // Determine the sign if the sum is zero
+    //      if cancelation then 0 unless round to -infinity
+    //      if multiply then Psgn
+    //      otherwise psign
+    assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
+    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
+
+
+    // is the result negitive
+    //  if p - z is the Sum negitive
+    //  if -p + z is the Sum positive
+    //  if -p - z then the Sum is negitive
+    assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
+    assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
+    assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
+
+    always_comb
+        case(PostProcSelM)
+            2'b10: ResSgn = FmaResSgn; // fma
+            2'b00: ResSgn = CvtResSgnM; // cvt
+            2'b01: ResSgn = 0; // divide
+            default: ResSgn = 1'bx; 
+        endcase
+endmodule
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@ -0,0 +1,316 @@
+`include "wally-config.vh"
+// what position is XLEN in?
+//  options: 
+//     1: XLEN > NF   > NF1
+//     2: NF   > XLEN > NF1
+//     3: NF   > NF1  > XLEN
+//  single and double will always be smaller than XLEN
+`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
+
+module round(
+    input logic  [`FMTBITS-1:0] OutFmt,       // precision 1 = double 0 = single
+    input logic  [2:0]          FrmM,       // rounding mode
+    input logic                 FmaOp,
+    input logic [1:0] PostProcSelM,
+    input logic                 CvtResDenormUfM,
+    input logic                 ToInt,
+    input logic                 CvtOp,
+    input logic                 CvtResUf,
+    input logic [`CORRSHIFTSZ-1:0]  CorrShifted,
+    input logic                 AddendStickyM,  // addend's sticky bit
+    input logic                 ZZeroM,         // is Z zero
+    input logic                 InvZM,          // invert Z
+    input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
+    input logic                 ResSgn,      // the result's sign
+    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    output logic                UfPlus1,  // do you add or subtract on from the result
+    output logic [`NE+1:0]      FullResExp,      // ResExp with bits to determine sign and overflow
+    output logic [`NF-1:0]      ResFrac,         // Result fraction
+    output logic [`NE-1:0]      ResExp,          // Result exponent
+    output logic                Sticky,             // sticky bit
+    output logic [`NE+1:0] RoundExp,
+    output logic Plus1,
+    output logic [`FLEN:0]      RoundAdd,           // how much to add to the result
+    output logic                Round, UfLSBRes // bits needed to calculate rounding
+);
+    logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
+    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
+    logic           UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
+    logic                 NormSumSticky;  // normalized sum's sticky bit
+    logic                 UfSticky;   // sticky bit for underlow calculation
+    logic [`NF-1:0] RoundFrac;
+    logic FpRes, IntRes;
+    logic           UfRound;
+    logic           FpRound, FpLSBRes, FpUfRound;
+    logic           CalcPlus1, FpPlus1;
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Rounding
+    ///////////////////////////////////////////////////////////////////////////////
+
+    // round to nearest even
+    //      {Round, Sticky}
+    //      0x - do nothing
+    //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
+    //          - don't add 1 if a small number was supposed to be subtracted
+    //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+    //         - plus 1 otherwise
+
+    //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+    //  round to -infinity
+    //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+    //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+    //  round to infinity
+    //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+    //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
+
+    //  round to nearest max magnitude
+    //      {Guard, Round, Sticky}
+    //      0x - do nothing
+    //      10 - tie - Plus1
+    //          - don't add 1 if a small number was supposed to be subtracted
+    //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+    //         - Plus 1 otherwise
+
+    assign IntRes = CvtOp & ToInt;
+    assign FpRes = ~IntRes;
+
+    // sticky bit calculation
+    if (`FPSIZES == 1) begin
+
+    //     1: XLEN > NF
+    //      |         XLEN          |
+    //      |    NF     |1|1|
+    //                     ^    ^ if floating point result
+    //                     ^ if not an FMA result
+        if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                 (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
+    //     2: NF > XLEN
+        if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
+                                                 (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+
+    end else if (`FPSIZES == 2) begin
+        // XLEN is either 64 or 32
+        // so half and single are always smaller then XLEN
+
+        // 1: XLEN > NF   > NF1
+        if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
+        // 2: NF   > XLEN > NF1
+        if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+        // 3: NF   > NF1  > XLEN
+        if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+
+    end else if (`FPSIZES == 3) begin
+        // 1: XLEN > NF   > NF1
+        if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
+        // 2: NF   > XLEN > NF1
+        if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+        // 3: NF   > NF1  > XLEN
+        if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+
+    end else if (`FPSIZES == 4) begin
+        // Quad precision will always be greater than XLEN
+        // 2: NF   > XLEN > NF1
+        if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
+        // 3: NF   > NF1  > XLEN
+        // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
+        if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
+
+    end
+    
+
+
+    // only add the Addend sticky if doing an FMA opperation
+    //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
+    assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
+    
+    // determine round and LSB of the rounded value
+    //      - underflow round bit is used to determint the underflow flag
+    if (`FPSIZES == 1) begin
+        assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
+        assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
+        assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
+
+    end else if (`FPSIZES == 2) begin
+        assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1];
+        assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1];
+        assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2];
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
+                end
+                `FMT1: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2];
+                end
+                `FMT2: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2];
+                end
+                default: begin
+                    FpRound = 1'bx;
+                    FpLSBRes = 1'bx;
+                    FpUfRound = 1'bx;
+                end
+            endcase
+    end else if (`FPSIZES == 4) begin
+        always_comb
+            case (OutFmt)
+                2'h3: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2];
+                end
+                2'h1: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2];
+                end
+                2'h0: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2];
+                end
+                2'h2: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2];
+                end
+            endcase
+    end
+
+    assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound;
+    assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
+    assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
+
+    // used to determine underflow flag
+    assign UfLSBRes = FpRound;
+    // determine sticky
+    assign Sticky = UfSticky | UfRound;
+
+
+    // Deterimine if a small number was supposed to be subtrated - For Fma calculation only
+    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
+    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
+
+    always_comb begin
+        // Determine if you add 1
+        case (FrmM)
+            3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
+            3'b001: CalcPlus1 = 0;//round to zero
+            3'b010: CalcPlus1 = ResSgn & ~(SubBySmallNum & ~Round);//round down
+            3'b011: CalcPlus1 = ~ResSgn & ~(SubBySmallNum & ~Round);//round up
+            3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
+            default: CalcPlus1 = 1'bx;
+        endcase
+        // Determine if you add 1 (for underflow flag)
+        case (FrmM)
+            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
+            3'b001: UfCalcPlus1 = 0;//round to zero
+            3'b010: UfCalcPlus1 = ResSgn & ~(UfSubBySmallNum & ~UfRound);//round down
+            3'b011: UfCalcPlus1 = ~ResSgn & ~(UfSubBySmallNum & ~UfRound);//round up
+            3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
+            default: UfCalcPlus1 = 1'bx;
+        endcase
+        // Determine if you subtract 1
+        case (FrmM)
+            3'b000: CalcMinus1 = 0;//round to nearest even
+            3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
+            3'b010: CalcMinus1 = ~ResSgn & ~Round & SubBySmallNum;//round down
+            3'b011: CalcMinus1 = ResSgn & ~Round & SubBySmallNum;//round up
+            3'b100: CalcMinus1 = 0;//round to nearest max magnitude
+            default: CalcMinus1 = 1'bx;
+        endcase
+   
+    end
+
+    // If an answer is exact don't round
+    assign Plus1 = CalcPlus1 & (Sticky | Round);
+    assign FpPlus1 = Plus1&~(ToInt&CvtOp);
+    assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky
+    assign Minus1 = CalcMinus1 & (Sticky | Round);
+
+    // Compute rounded result
+    if (`FPSIZES == 1) begin
+        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
+
+    end else if (`FPSIZES == 2) begin
+        // \/FLEN+1
+        //  | NE+2 |        NF      |
+        //  '-NE+2-^----NF1----^
+        // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
+        assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
+                                   Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (OutFmt)
+                `FMT:  RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
+                `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+                `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
+                default: RoundAdd = (`FLEN+1)'(0);
+            endcase
+        end
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb begin
+            case (OutFmt)
+                2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
+                2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
+                2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
+                2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
+            endcase
+        end
+
+    end
+
+    // determine the result to be roundned
+    assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+    
+    always_comb
+        case(PostProcSelM)
+            2'b10: RoundExp = SumExp; // fma
+            2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
+            2'b01: RoundExp = 0; // divide
+            default: RoundExp = 0; 
+        endcase
+
+    // round the result
+    //      - if the fraction overflows one should be added to the exponent
+    assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd;
+    assign ResExp = FullResExp[`NE-1:0];
+
+
+endmodule
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@ -98,7 +98,7 @@ module unpackinput (
                `FMT:  BadNaNBox = 0;
                `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
                `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
-                default: BadNaNBox = 0;
+                default: BadNaNBox = 1'bx;
            endcase

        // extract the sign bit
@ -107,7 +107,7 @@ module unpackinput (
                `FMT:  Sgn = In[`FLEN-1];
                `FMT1: Sgn = In[`LEN1-1];
                `FMT2: Sgn = In[`LEN2-1];
-                default: Sgn = 0;
+                default: Sgn = 1'bx;
            endcase

        // extract the fraction
@ -116,7 +116,7 @@ module unpackinput (
                `FMT: Frac = In[`NF-1:0];
                `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
                `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
-                default: Frac = 0;
+                default: Frac = {`NF{1'bx}};
            endcase

        // is the exponent non-zero
@ -125,7 +125,7 @@ module unpackinput (
                `FMT:  ExpNonZero = |In[`FLEN-2:`NF];     // if input is largest precision (`FLEN - ie quad or double)
                `FMT1: ExpNonZero = |In[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
                `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
-                default: ExpNonZero = 0; 
+                default: ExpNonZero = 1'bx; 
            endcase
            
        // example double to single conversion:
@ -142,7 +142,7 @@ module unpackinput (
                `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
                `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
                `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; 
-                default: Exp = 0;
+                default: Exp = {`NE{1'bx}};
            endcase

        // is the exponent all 1's
@ -151,7 +151,7 @@ module unpackinput (
                `FMT:  ExpMax = &In[`FLEN-2:`NF];
                `FMT1: ExpMax = &In[`LEN1-2:`NF1];
                `FMT2: ExpMax = &In[`LEN2-2:`NF2];
-                default: ExpMax = 0;
+                default: ExpMax = 1'bx;
            endcase

    end else if (`FPSIZES == 4) begin      // if all precsisons are supported - quad, double, single, and half
--- a/pipelined/src/ieu/datapath.sv
+++ b/pipelined/src/ieu/datapath.sv
@ -61,6 +61,8 @@ module datapath (
 (* mark_debug = "true" *)  input  logic             RegWriteW, 
  input  logic             SquashSCW,
  input  logic [2:0]       ResultSrcW,
+  input logic [`XLEN-1:0]  FCvtIntResW,
+  input logic [1:0]        FResSelW,
  output logic [`XLEN-1:0] ReadDataW,
  // input  logic [`XLEN-1:0] PCLinkW,
  input  logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW, 
@ -120,14 +122,17 @@ module datapath (
  flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
  flopenrc #(5)     RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
  flopen #(`XLEN)   ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
-  mux5  #(`XLEN)    resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 

  // floating point interactions: fcvt, fp stores
  if (`F_SUPPORTED) begin:fpmux
+    logic [`XLEN-1:0] IFCvtResultW;
    mux2  #(`XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
    mux2  #(`XLEN)  writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
+    mux2  #(`XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
+    mux5  #(`XLEN)    resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 
  end else begin:fpmux
    assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
+    mux5  #(`XLEN)    resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 
  end

  // handle Store Conditional result if atomic extension supported
--- a/pipelined/src/ieu/ieu.sv
+++ b/pipelined/src/ieu/ieu.sv
@ -61,6 +61,8 @@ module ieu (

  // Writeback stage
  input logic [`XLEN-1:0]  CSRReadValW, ReadDataM, MDUResultW,
+  input logic [1:0]        FResSelW,
+  input logic [`XLEN-1:0]  FCvtIntResW,
  output logic [4:0]       RdW,
  output logic [`XLEN-1:0] ReadDataW,
  // input  logic [`XLEN-1:0] PCLinkW,
@ -105,8 +107,8 @@ module ieu (
    .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
    .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
    .FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, 
-    .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE,
-    .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW,
+    .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
+    .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
    .CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);             
  
  forward    fw(
--- a/pipelined/src/ifu/ifu.sv
+++ b/pipelined/src/ifu/ifu.sv
@ -38,9 +38,13 @@ module ifu (
 	// Bus interface
 (* mark_debug = "true" *)	input logic [`XLEN-1:0] 	IFUBusHRDATA,
 (* mark_debug = "true" *)	input logic 				IFUBusAck,
+(* mark_debug = "true" *)	input logic 				IFUBusInit,
 (* mark_debug = "true" *)	output logic [`PA_BITS-1:0] IFUBusAdr,
 (* mark_debug = "true" *)	output logic 				IFUBusRead,
 (* mark_debug = "true" *)	output logic 				IFUStallF,
+(* mark_debug = "true" *) output logic [2:0]  IFUBurstType,
+(* mark_debug = "true" *) output logic [1:0]  IFUTransType,
+(* mark_debug = "true" *) output logic        IFUTransComplete,
 	(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF, 
 	// Execute
 	output logic [`XLEN-1:0] 	PCLinkE,
@ -201,8 +205,8 @@ module ifu (
    
    busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) 
    busdp(.clk, .reset,
-          .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusWriteCrit(),
-          .LSUBusRead(IFUBusRead), .LSUBusSize(), 
+          .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
+          .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
          .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr),
          .WordCount(), 
          .DCacheFetchLine(ICacheFetchLine),
--- a/pipelined/src/lsu/busdp.sv
+++ b/pipelined/src/lsu/busdp.sv
@ -40,9 +40,13 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
  // bus interface
  input logic [`XLEN-1:0]     LSUBusHRDATA,
  input logic                 LSUBusAck,
+  input logic                 LSUBusInit,
  output logic                LSUBusWrite,
  output logic                LSUBusRead,
-  output logic [2:0]          LSUBusSize, 
+  output logic [2:0]          LSUBusSize,
+  output logic [2:0]          LSUBurstType,
+  output logic [1:0]          LSUTransType, // For AHBLite
+  output logic                LSUTransComplete,
  input logic [2:0]           LSUFunct3M,
  output logic [`PA_BITS-1:0] LSUBusAdr, // ** change name to HADDR to make ahb lite.
  output logic [LOGWPL-1:0]   WordCount,
@ -66,13 +70,15 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
  
  localparam integer   WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0;
  logic [`PA_BITS-1:0]        LocalLSUBusAdr;
+  logic [LOGWPL-1:0]   WordCountDelayed;
+

  // *** implement flops as an array if feasbile; DCacheBusWriteData might be a problem
  // *** better name than DCacheBusWriteData
  genvar                      index;
  for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
    logic [WORDSPERLINE-1:0] CaptureWord;
-    assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCount);
+    assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed);
    flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA),
      .q(DCacheBusWriteData[(index+1)*`XLEN-1:index*`XLEN]));
  end
@ -83,6 +89,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)

  busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm(
    .clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
-		.LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
-		.DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount);
+		.LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
+		.LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed);
 endmodule
--- a/pipelined/src/lsu/busfsm.sv
+++ b/pipelined/src/lsu/busfsm.sv
@ -41,6 +41,7 @@ module busfsm #(parameter integer   WordCountThreshold,
   input logic               DCacheFetchLine,
   input logic               DCacheWriteLine,
   input logic               LSUBusAck,
+   input logic               LSUBusInit, // This might be better as LSUBusLock, or to send this using LSUBusAck.
   input logic               CPUBusy,
   input logic               CacheableM,

@ -48,10 +49,13 @@ module busfsm #(parameter integer   WordCountThreshold,
   output logic              LSUBusWrite,
   output logic              LSUBusWriteCrit,
   output logic              LSUBusRead,
+   output logic [2:0]        LSUBurstType,
+   output logic              LSUTransComplete,
+   output logic [1:0]        LSUTransType,
   output logic              DCacheBusAck,
   output logic              BusCommittedM,
   output logic              SelUncachedAdr,
-   output logic [LOGWPL-1:0] WordCount);
+   output logic [LOGWPL-1:0] WordCount, WordCountDelayed);
  

  
@ -61,7 +65,8 @@ module busfsm #(parameter integer   WordCountThreshold,
  logic 			   CntReset;
  logic 			   WordCountFlag;
  logic [LOGWPL-1:0]   NextWordCount;
-  logic 			   UnCachedAccess;
+  logic 			   UnCachedAccess, UnCachedRW;
+  logic [2:0]    LocalBurstType;
  

  typedef enum logic [2:0] {STATE_BUS_READY,
@ -75,18 +80,27 @@ module busfsm #(parameter integer   WordCountThreshold,

  (* mark_debug = "true" *) busstatetype BusCurrState, BusNextState;

-
+  // Used to send address for address stage of AHB.
  flopenr #(LOGWPL) 
  WordCountReg(.clk(clk),
 		.reset(reset | CntReset),
 		.en(CntEn),
 		.d(NextWordCount),
-		.q(WordCount));
+		.q(WordCount));  
+  
+  // Used to store data from data phase of AHB.
+  flopenr #(LOGWPL) 
+  WordCountDelayedReg(.clk(clk),
+		.reset(reset | CntReset),
+		.en(CntEn),
+		.d(WordCount),
+		.q(WordCountDelayed));

  assign NextWordCount = WordCount + 1'b1;

-  assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]);
-  assign CntEn = PreCntEn & LSUBusAck;
+  assign PreCntEn = (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_WRITE);
+  assign WordCountFlag = (WordCountDelayed == WordCountThreshold[LOGWPL-1:0]); // Detect when we are waiting on the final access.
+  assign CntEn = (PreCntEn & LSUBusAck | (LSUBusInit)) & ~WordCountFlag & ~UnCachedRW; // Want to count when doing cache accesses and we aren't wrapping up.

  assign UnCachedAccess = ~CACHE_ENABLED | ~CacheableM;

@ -120,14 +134,29 @@ module busfsm #(parameter integer   WordCountThreshold,
 	endcase
  end

+  always_comb begin
+    case(WordCountThreshold)
+      0:        LocalBurstType = 3'b000;
+      3:        LocalBurstType = 3'b011; // INCR4
+      7:        LocalBurstType = 3'b101; // INCR8
+      15:       LocalBurstType = 3'b111; // INCR16
+      default:  LocalBurstType = 3'b001; // INCR without end.
+    endcase
+  end

-  assign CntReset = BusCurrState == STATE_BUS_READY;
+  // Would these be better as always_comb statements or muxes?
+  assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access.
+  assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck;
+  // Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up.
+  assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00; 
+  // Reset if we aren't initiating a transaction or if we are finishing a transaction.
+  assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete; 
+  
  assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((UnCachedAccess & (|LSURWM)) | DCacheFetchLine | DCacheWriteLine)) |
 					(BusCurrState == STATE_BUS_UNCACHED_WRITE) |
 					(BusCurrState == STATE_BUS_UNCACHED_READ) |
 					(BusCurrState == STATE_BUS_FETCH)  |
 					(BusCurrState == STATE_BUS_WRITE);
-  assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE;
  assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) |
 							   (BusCurrState == STATE_BUS_UNCACHED_WRITE);
  assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
@ -139,6 +168,10 @@ module busfsm #(parameter integer   WordCountThreshold,
 							  (BusCurrState == STATE_BUS_UNCACHED_READ);
  assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);

+
+  // Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache.
+  assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead; 
+
  assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LSUBusAck) |
 						(BusCurrState == STATE_BUS_WRITE & WordCountFlag & LSUBusAck);
  assign BusCommittedM = BusCurrState != STATE_BUS_READY;
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@ -66,9 +66,13 @@ module lsu (
   (* mark_debug = "true" *)   output logic LSUBusRead, 
   (* mark_debug = "true" *)   output logic LSUBusWrite,
   (* mark_debug = "true" *)   input logic LSUBusAck,
+   (* mark_debug = "true" *)   input logic LSUBusInit,
   (* mark_debug = "true" *)   input logic [`XLEN-1:0] LSUBusHRDATA,
   (* mark_debug = "true" *)   output logic [`XLEN-1:0] LSUBusHWDATA,
   (* mark_debug = "true" *)   output logic [2:0] LSUBusSize, 
+   (* mark_debug = "true" *)   output logic [2:0] LSUBurstType,
+   (* mark_debug = "true" *)   output logic [1:0] LSUTransType,
+   (* mark_debug = "true" *)   output logic LSUTransComplete,
            // page table walker
   input logic [`XLEN-1:0]  SATP_REGW, // from csr
   input logic              STATUS_MXR, STATUS_SUM, STATUS_MPRV,
@ -211,7 +215,7 @@ module lsu (
            
    busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp(
      .clk, .reset,
-      .LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize,
+      .LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
      .WordCount, .LSUBusWriteCrit,
      .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine,
      .DCacheWriteLine, .DCacheBusAck, .DCacheBusWriteData, .LSUPAdrM,
--- a/pipelined/src/uncore/gpio.sv
+++ b/pipelined/src/uncore/gpio.sv
@ -48,7 +48,7 @@ module gpio (

  logic [31:0] input0d, input1d, input2d, input3d;
  logic [31:0] input_val, input_en, output_en, output_val;
-  logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip; 
+  logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor; 

  logic initTrans, memwrite;
  logic [7:0] entry, entryd;
@ -91,6 +91,7 @@ module gpio (
      high_ip <= #1 0;
      low_ie <= #1 0;
      low_ip <= #1 0;
+      out_xor <= #1 0;
    end else begin
      // writes
      if (memwrite)
@ -104,7 +105,7 @@ module gpio (
          8'h20: fall_ie <= #1 Din;
          8'h28: high_ie <= #1 Din;
          8'h30: low_ie  <= #1 Din;
-          8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR
+          8'h40: out_xor <= #1 Din; 
        endcase
        /* verilator lint_on CASEINCOMPLETE */
      // reads
@ -121,7 +122,7 @@ module gpio (
        8'h2C: Dout <= #1 high_ip;
        8'h30: Dout <= #1 low_ie;
        8'h34: Dout <= #1 low_ip;
-        8'h40: Dout <= #1 0; // OUT_XOR reads as 0
+        8'h40: Dout <= #1 out_xor;
        default: Dout <= #1 0;
      endcase
      // interrupts
@ -152,7 +153,7 @@ module gpio (
  flop #(32) sync2(HCLK,input1d,input2d);
  flop #(32) sync3(HCLK,input2d,input3d);
  assign input_val = input3d;
-  assign GPIOPinsOut = output_val;
+  assign GPIOPinsOut = output_val ^ out_xor;
  assign GPIOPinsEn = output_en;

  assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)};
--- a/pipelined/src/uncore/ram.sv
+++ b/pipelined/src/uncore/ram.sv
@ -43,77 +43,37 @@ module ram #(parameter BASE=0, RANGE = 65535) (
  output logic             HRESPRam, HREADYRam
 );

-  // Desired changes.
-  // 1. find a way to merge read and write address into 1 port.
-  // 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.)
-  // 3. implement burst.
-  // 4. remove the configurable latency.
+  localparam ADDR_WIDTH = $clog2(RANGE/8);
+  localparam OFFSET = $clog2(`XLEN/8);   

  logic [`XLEN/8-1:0] 		  ByteMask;
  logic [31:0]        HADDRD, RamAddr;
-  //logic				  prevHREADYRam, risingHREADYRam;
  logic				  initTrans;
  logic				  memwrite, memwriteD, memread;
  logic         nextHREADYRam;
-  //logic [3:0] 		  busycount;
-  
-  swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));

+  // a new AHB transactions starts when HTRANS requests a transaction, 
+  // the peripheral is selected, and the previous transaction is completing
  assign initTrans = HREADY & HSELRam & (HTRANS[1]); 
-  assign memwrite = initTrans & HWRITE;  // *** why is initTrans needed?  See CLINT interface
+  assign memwrite = initTrans & HWRITE;  
  assign memread = initTrans & ~HWRITE;
 
  flopenr #(1) memwritereg(HCLK, ~HRESETn, HREADY, memwrite, memwriteD); 
  flopenr #(32)   haddrreg(HCLK, ~HRESETn, HREADY, HADDR, HADDRD);

-/*  // busy FSM to extend READY signal
-  always @(posedge HCLK, negedge HRESETn) 
-    if (~HRESETn) begin
-      busycount <= 0;
-      HREADYRam <= #1 0;
-    end else begin
-      if (initTrans) begin
-        busycount <= 0;
-        HREADYRam <= #1 0;
-      end else if (~HREADYRam) begin
-        if (busycount == 0) begin // Ram latency, for testing purposes.  *** test with different values such as 2
-          HREADYRam <= #1 1;
-        end else begin
-          busycount <= busycount + 1;
-        end
-      end
-    end */
-
-
  // Stall on a read after a write because the RAM can't take both adddresses on the same cycle
  assign nextHREADYRam = ~(memwriteD & memread);
-// assign nextHREADYRam = ~(memwriteD & ~memwrite);
  flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam);
-//  assign HREADYRam = ~(memwriteD & ~memwrite);
  assign HRESPRam = 0; // OK

-  localparam ADDR_WIDTH = $clog2(RANGE/8);
-  localparam OFFSET = $clog2(`XLEN/8);
-  
-/*  // Rising HREADY edge detector
-  //   Indicates when ram is finishing up
-  //   Needed because HREADY may go high for other reasons,
-  //   and we only want to write data when finishing up.
-  flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
-  assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/
-
-/*
- bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
-  memory(.clk(HCLK), .reA(1'b1),
-		 .addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
-		 .weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
-		 .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */
-
-    
-
  // On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address
  mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr);

+  // Byte mask for subword writes
+  // ***the CLINT and other peripherals duplicate this hardware
+  // *** it shoudl be centralized and sent over HWSTRB
+  swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
+
  // single-ported RAM
  bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH)
    memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA));  
--- a/pipelined/src/wally/wallypipelinedcore.sv
+++ b/pipelined/src/wally/wallypipelinedcore.sv
@ -93,10 +93,12 @@ module wallypipelinedcore (
  logic             FWriteIntE;
  logic [`XLEN-1:0]         FWriteDataE;
  logic [`XLEN-1:0]         FIntResM;  
+  logic [`XLEN-1:0]         FCvtIntResW;  
  logic             FDivBusyE;
  logic             IllegalFPUInstrD, IllegalFPUInstrE;
  logic             FRegWriteM;
  logic             FPUStallD;
+  logic [1:0]       FResSelW;
  logic [4:0]             SetFflagsM;

  // memory management unit signals
@ -134,13 +136,16 @@ module wallypipelinedcore (
  logic [`PA_BITS-1:0]         IFUBusAdr;
  logic [`XLEN-1:0]         IFUBusHRDATA;
  logic             IFUBusRead;
-  logic             IFUBusAck;
+  logic             IFUBusAck, IFUBusInit;
+  logic [2:0]       IFUBurstType;
+  logic [1:0]       IFUTransType;
+  logic             IFUTransComplete;
  
  // AHB LSU interface
  logic [`PA_BITS-1:0]         LSUBusAdr;
  logic             LSUBusRead;
  logic             LSUBusWrite;
-  logic             LSUBusAck;
+  logic             LSUBusAck, LSUBusInit;
  logic [`XLEN-1:0]         LSUBusHRDATA;
  logic [`XLEN-1:0]         LSUBusHWDATA;
  
@ -152,6 +157,9 @@ module wallypipelinedcore (
  logic [4:0]             InstrClassM;
  logic             InstrAccessFaultF;
  logic [2:0]             LSUBusSize;
+  logic [2:0]             LSUBurstType;
+  logic [1:0]             LSUTransType;
+  logic             LSUTransComplete;
  
  logic             DCacheMiss;
  logic             DCacheAccess;
@ -166,8 +174,8 @@ module wallypipelinedcore (
    .StallF, .StallD, .StallE, .StallM, 
    .FlushF, .FlushD, .FlushE, .FlushM, 
    // Fetch
-    .IFUBusHRDATA, .IFUBusAck, .PCF, .IFUBusAdr,
-    .IFUBusRead, .IFUStallF,
+    .IFUBusHRDATA, .IFUBusAck, .IFUBusInit, .PCF, .IFUBusAdr,
+    .IFUBusRead, .IFUStallF, .IFUBurstType, .IFUTransType, .IFUTransComplete,
    .ICacheAccess, .ICacheMiss,

    // Execute
@ -224,6 +232,8 @@ module wallypipelinedcore (
     .CSRReadValW, .ReadDataM, .MDUResultW,
     .RdW, .ReadDataW,
     .InstrValidM, 
+     .FCvtIntResW,
+     .FResSelW,

     // hazards
     .StallD, .StallE, .StallM, .StallW,
@ -247,8 +257,8 @@ module wallypipelinedcore (
  .IEUAdrE, .IEUAdrM, .WriteDataE,
  .ReadDataM, .FlushDCacheM,
  // connected to ahb (all stay the same)
-  .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck,
-  .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize,
+  .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
+  .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,

    // connect to csr or privilege and stay the same.
    .PrivilegeModeW, .BigEndianM,          // connects to csr
@ -279,13 +289,22 @@ module wallypipelinedcore (
  ahblite ebu(// IFU connections
     .clk, .reset,
     .UnsignedLoadM(1'b0), .AtomicMaskedM(2'b00),
-     .IFUBusAdr,
-     .IFUBusRead, .IFUBusHRDATA, .IFUBusAck,
+     .IFUBusAdr, .IFUBusRead, 
+     .IFUBusHRDATA, 
+     .IFUBurstType, 
+     .IFUTransType, 
+     .IFUTransComplete,
+     .IFUBusAck, 
+     .IFUBusInit, 
     // Signals from Data Cache
     .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusHWDATA,
     .LSUBusHRDATA,
     .LSUBusSize,
+     .LSUBurstType,
+     .LSUTransType,
+     .LSUTransComplete,
     .LSUBusAck,
+     .LSUBusInit,
 
     .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
     .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST,
@ -375,6 +394,8 @@ module wallypipelinedcore (
         .FWriteIntE, // integer register write enable
         .FWriteDataE, // Data to be written to memory
         .FIntResM, // data to be written to integer register
+         .FCvtIntResW, // fp -> int conversion result to be stored in int register
+         .FResSelW,   // fpu result selection
         .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
         .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
         .SetFflagsM        // FPU flags (to privileged unit)
--- a/pipelined/srt/stine/Makefile
+++ b/pipelined/srt/stine/Makefile
@ -0,0 +1,18 @@
+
+CC     = gcc
+CFLAGS = -lm
+LIBS   = 
+OBJS   = disp.o srt4div.o
+
+srt4div:  	$(OBJS)
+		$(CC) -g -O3 -o srt4div $(OBJS) $(CFLAGS)
+
+disp.o:		disp.h disp.c
+		$(CC) -g -c -o disp.o disp.c $(CFLAGS)
+
+srt4div.o:	srt4div.c
+		$(CC) -g -c -o srt4div.o srt4div.c $(CFLAGS)
+
+clean:
+	rm -f *.o *~
+	rm -f core
--- a/pipelined/srt/stine/disp.c
+++ b/pipelined/srt/stine/disp.c
@ -0,0 +1,60 @@
+#include "disp.h"
+
+double rnd_zero(double x, double bits) {
+  if (x < 0) 
+    return ceiling(x, bits);
+  else
+    return flr(x, bits);
+}
+
+double rne(double x, double precision) {
+  double scale, x_round;
+  scale = pow(2.0, precision);
+  x_round = rint(x * scale) / scale;
+  return x_round;
+}
+
+double flr(double x, double precision) {
+  double scale, x_round;
+  scale = pow(2.0, precision);
+  x_round = floor(x * scale) / scale;
+  return x_round;
+}
+
+double ceiling(double x, double precision) {
+  double scale, x_round;
+  scale = pow(2.0, precision);
+  x_round = ceil(x * scale) / scale;
+  return x_round;
+}
+
+void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file) {
+
+  double diff;
+  int i;
+  if (fabs(x) <  pow(2.0, -bits_to_right)) {
+    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+      fprintf(out_file,"0");
+    }
+    return;
+  }
+  if (x < 0.0) {
+    // fprintf(out_file, "-");
+    // x = - x;
+    x = pow(2.0, ((double) bits_to_left)) + x;
+  }
+  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+    diff = pow(2.0, -i);
+    if (x < diff) {
+      fprintf(out_file, "0");
+    }
+    else {
+      fprintf(out_file, "1");
+      x -= diff;
+    }
+    if (i == 0) {
+      fprintf(out_file, ".");
+    }
+  }
+}
+
--- a/pipelined/srt/stine/disp.h
+++ b/pipelined/srt/stine/disp.h
@ -0,0 +1,18 @@
+#include <stdlib.h>
+#include <math.h>
+#include <stdio.h>
+
+#ifndef DISP
+#define DISP
+
+double rnd_zero(double x, double bits);
+
+double rne(double x, double precision);
+
+double flr(double x, double precision);
+
+double ceiling(double x, double precision);
+
+void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file);
+
+#endif 
--- a/pipelined/srt/stine/intdiv.sv
+++ b/pipelined/srt/stine/intdiv.sv
@ -195,7 +195,7 @@ module divide4 #(parameter WIDTH=64)
   logic [WIDTH:0] 	     Qstar;   
   logic [WIDTH:0] 	     QMstar;
   logic [WIDTH:0] 	     QM2star;   
-   logic [6:0] 		     qtotal;   
+   logic [7:0] 		     qtotal;   
   logic [WIDTH+3:0] 	     SumN, CarryN, SumN2, CarryN2;
   logic [WIDTH+3:0] 	     divi1, divi2, divi1c, divi2c, dive1;
   logic [WIDTH+3:0] 	     mdivi_temp, mdivi;   
@ -219,9 +219,9 @@ module divide4 #(parameter WIDTH=64)
   mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN);
   mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN);
   // Simplify QST
-   adder #(7) cpa1 (SumN[WIDTH+3:WIDTH-3], CarryN[WIDTH+3:WIDTH-3], qtotal);   
+   adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal);   
   // q = {+2, +1, -1, -2} else q = 0
-   qst4 pd1 (qtotal[6:0], divi1[WIDTH-1:WIDTH-3], quotient);
+   qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient);
   assign ulp = quotient[2]|quotient[3];
   assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]);
   // Map to binary encoding
--- a/pipelined/srt/stine/srt4div
+++ b/pipelined/srt/stine/srt4div
--- a/pipelined/srt/stine/srt4div.c
+++ b/pipelined/srt/stine/srt4div.c
@ -0,0 +1,325 @@
+#include "disp.h"
+#include <math.h>
+
+int qslc (double prem, double d) {
+
+  int q;
+
+  printf("d  --> %lg\n", d);
+  printf("rw --> %lg\n", prem);
+  if ((d>=0.0)&&(d<1.0)) {
+    if (prem>=1.0)
+       q = 2;
+    else if (prem>=0.25)
+      q = 1;
+    else if (prem>=-0.25)
+      q = 0;
+    else if (prem >= -1)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=1.0)&&(d<2.0)) {
+    if (prem>=2.0)
+       q = 2;
+    else if (prem>=0.66667)
+      q = 1;
+    else if (prem>=-0.6667)
+      q = 0;
+    else if (prem >= -2)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=2.0)&&(d<3.0)) {
+    if (prem>=4.0)
+       q = 2;
+    else if (prem>=1.25)
+      q = 1;
+    else if (prem>=-1.25)
+      q = 0;
+    else if (prem >= -4)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=3.0)&&(d<4.0)) {
+    if (prem>=5.0)
+       q = 2;
+    else if (prem>=2.0)
+      q = 1;
+    else if (prem>=-2.0)
+      q = 0;
+    else if (prem >= -5)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=4.0)&&(d<5.0)) {
+    if (prem>=6.66667)
+       q = 2;
+    else if (prem>=2.0)
+      q = 1;
+    else if (prem>=-2.0)
+      q = 0;
+    else if (prem >= -6.66667)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=5.0)&&(d<6.0)) {
+    if (prem>=8.0)
+       q = 2;
+    else if (prem>=2.0)
+      q = 1;
+    else if (prem>=-2.0)
+      q = 0;
+    else if (prem >= -8.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=6.0)&&(d<7.0)) {
+    if (prem>=10.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -10.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=7.0)&&(d<8.0)) {
+    if (prem>=11.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -11.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=8.0)&&(d<9.0)) {
+    if (prem>=12.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -12.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=9.0)&&(d<10.0)) {
+    if (prem>=15.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -15.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=10.0)&&(d<11.0)) {
+    if (prem>=15.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -15.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=11.0)&&(d<12.0)) {
+    if (prem>=16.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -16.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=12.0)&&(d<13.0)) {
+    if (prem>=20.0)
+       q = 2;
+    else if (prem>=8.0)
+      q = 1;
+    else if (prem>=-8.0)
+      q = 0;
+    else if (prem >= -20.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=13.0)&&(d<14.0)) {
+    if (prem>=20.0)
+       q = 2;
+    else if (prem>=8.0)
+      q = 1;
+    else if (prem>=-8.0)
+      q = 0;
+    else if (prem >= -20.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=14.0)&&(d<15.0)) {
+    if (prem>=20.0)
+       q = 2;
+    else if (prem>=8.0)
+      q = 1;
+    else if (prem>=-8.0)
+      q = 0;
+    else if (prem >= -20.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=15.0)&&(d<16.0)) {
+    if (prem>=24.0)
+       q = 2;
+    else if (prem>=8.0)
+      q = 1;
+    else if (prem>=-8.0)
+      q = 0;
+    else if (prem >= -24.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+}
+
+/*
+ This routine performs a radix-4 SRT division 
+ algorithm.  The user inputs the numerator, the denominator, 
+ and the number of iterations. It assumes that 0.5 <= D < 1.
+        
+*/
+
+int main(int argc, char* argv[]) {
+
+   double P, N, D, Q, RQ, RD, RREM, scale;   
+   int q;
+   int num_iter, i;
+   int prec;
+   if (argc < 5) {
+      fprintf(stderr,
+	      "Usage: %s numerator denominator num_iterations prec\n", 
+	      argv[0]);
+      exit(1);
+   }
+   sscanf(argv[1],"%lg", &N);
+   sscanf(argv[2],"%lg", &D);
+   sscanf(argv[3],"%d", &num_iter);
+   sscanf(argv[4],"%d", &prec);
+   // Round to precision
+   N = rne(N, prec);
+   D = rne(D, prec);
+   printf("N = ");
+   disp_bin(N, 3, prec, stdout);
+   printf("\n");
+   printf("D = ");
+   disp_bin(D, 3, prec, stdout);
+   printf("\n");
+
+   Q = 0;
+   P = N*0.25;
+   printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n", 
+	  N, D, N/D, num_iter); 
+   for (scale = 1, i = 0; i < num_iter; i++) {
+     // Shift by r
+     scale = scale*0.25;
+     q = qslc(flr((4*P)*16,3), D*16);
+     //q = -q;
+     printf("4*W[n] = ");
+     disp_bin(4*P,3,prec,stdout);
+     printf("\n");
+     printf("q*D = ");      
+     disp_bin(q*D,3,prec,stdout);
+     printf("\n");
+     printf("W[n+1] = ");            
+     disp_bin(P ,3,prec,stdout);
+     printf("\n");
+     // Recurrence
+     P = 4*P - q*D;
+     // OTFC
+     Q = Q + q*scale;
+     printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P); 
+     printf("i = %d, q = %d", i, q);
+     printf(", Q = ");
+     disp_bin(Q, 3, prec, stdout);
+     printf(", W = ");
+     disp_bin(P, 3, prec, stdout);
+     printf("\n\n");
+   }
+   if (P < 0) {
+     Q = Q - scale;
+     P = P + D;
+     printf("\nCorrecting Negative Remainder\n"); 
+     printf("Q = %1.18lf, W = %1.18lf\n", Q, P); 
+     printf("Q = ");
+     disp_bin(Q, 3, prec, stdout);
+     printf(", W = ");
+     disp_bin(P, 3, prec, stdout);
+     printf("\n");
+   } 
+   RQ = flr(N/D, (double) prec);
+   RD = Q*4;
+   printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
+   printf("true = ");
+   disp_bin(RQ, 3, prec, stdout);
+   printf(", computed = ");
+   disp_bin(RD, 3, prec, stdout);
+   printf("\n\n");
+   printf("REM = %1.18lf \n", P);
+   printf("REM = ");
+   disp_bin(P, 3, prec, stdout);
+   printf("\n\n");
+
+   return 0;
+
+}
--- a/pipelined/srt/testvectors
+++ b/pipelined/srt/testvectors
@ -1,289 +0,0 @@
-4000000000000000_4000000000000000_3ff0000000000000
-c018000000000000_4000000000000000_c008000000000000
-4024000000000000_4000000000000000_4014000000000000
-c032000000000000_4000000000000000_c022000000000000
-4041000000000000_4000000000000000_4031000000000000
-c05c000000000000_4000000000000000_c04c000000000000
-406e000000000000_4000000000000000_405e000000000000
-c07ffff583a53b8e_4000000000000000_c06ffff583a53b8e
-408199999999999a_4000000000000000_407199999999999a
-c093333333333333_4000000000000000_c083333333333333
-40a028f5c28f5c29_4000000000000000_409028f5c28f5c29
-c0b004189374bc6a_4000000000000000_c0a004189374bc6a
-40c00068db8bac71_4000000000000000_40b00068db8bac71
-c0dd1745d1745d17_4000000000000000_c0cd1745d1745d17
-40e5555555555555_4000000000000000_40d5555555555555
-c0f999999999999a_4000000000000000_c0e999999999999a
-410c71c71c71c71c_4000000000000000_40fc71c71c71c71c
-4000000000000000_c018000000000000_bfe5555555555555
-c018000000000000_c018000000000000_3ff0000000000000
-4024000000000000_c018000000000000_c00aaaaaaaaaaaab
-c032000000000000_c018000000000000_4018000000000000
-4041000000000000_c018000000000000_c026aaaaaaaaaaab
-c05c000000000000_c018000000000000_4032aaaaaaaaaaab
-406e000000000000_c018000000000000_c044000000000000
-c07ffff583a53b8e_c018000000000000_4055554e57c37d09
-408199999999999a_c018000000000000_c067777777777778
-c093333333333333_c018000000000000_4079999999999999
-40a028f5c28f5c29_c018000000000000_c0858bf258bf258c
-c0b004189374bc6a_c018000000000000_40955acb6f46508d
-40c00068db8bac71_c018000000000000_c0a555e124ba3b41
-c0dd1745d1745d17_c018000000000000_40b364d9364d9365
-40e5555555555555_c018000000000000_c0cc71c71c71c71c
-c0f999999999999a_c018000000000000_40d1111111111111
-410c71c71c71c71c_c018000000000000_c0e2f684bda12f68
-4000000000000000_4024000000000000_3fd999999999999a
-c018000000000000_4024000000000000_bfe3333333333333
-4024000000000000_4024000000000000_3ff0000000000000
-c032000000000000_4024000000000000_c00ccccccccccccd
-4041000000000000_4024000000000000_401b333333333333
-c05c000000000000_4024000000000000_c026666666666666
-406e000000000000_4024000000000000_4038000000000000
-c07ffff583a53b8e_4024000000000000_c0499991361dc93e
-408199999999999a_4024000000000000_405c28f5c28f5c2a
-c093333333333333_4024000000000000_c06eb851eb851eb8
-40a028f5c28f5c29_4024000000000000_4079db22d0e56042
-c0b004189374bc6a_4024000000000000_c089a027525460aa
-40c00068db8bac71_4024000000000000_40999a415f45e0b5
-c0dd1745d1745d17_4024000000000000_c0a745d1745d1746
-40e5555555555555_4024000000000000_40b1111111111111
-c0f999999999999a_4024000000000000_c0c47ae147ae147b
-410c71c71c71c71c_4024000000000000_40d6c16c16c16c16
-4000000000000000_c032000000000000_bfcc71c71c71c71c
-c018000000000000_c032000000000000_3fd5555555555555
-4024000000000000_c032000000000000_bfe1c71c71c71c72
-c032000000000000_c032000000000000_3ff0000000000000
-4041000000000000_c032000000000000_c00e38e38e38e38e
-c05c000000000000_c032000000000000_4018e38e38e38e39
-406e000000000000_c032000000000000_c02aaaaaaaaaaaab
-c07ffff583a53b8e_c032000000000000_403c71bdca59fc0c
-408199999999999a_c032000000000000_c04f49f49f49f4a0
-c093333333333333_c032000000000000_4051111111111111
-40a028f5c28f5c29_c032000000000000_c06cba9876543210
-c0b004189374bc6a_c032000000000000_407c790f3f086b67
-40c00068db8bac71_c032000000000000_c08c7281864da457
-c0dd1745d1745d17_c032000000000000_4099dbcc48676f31
-40e5555555555555_c032000000000000_c0a2f684bda12f68
-c0f999999999999a_c032000000000000_40b6c16c16c16c17
-410c71c71c71c71c_c032000000000000_c0c948b0fcd6e9e0
-4000000000000000_4041000000000000_3fbe1e1e1e1e1e1e
-c018000000000000_4041000000000000_bfc6969696969697
-4024000000000000_4041000000000000_3fd2d2d2d2d2d2d3
-c032000000000000_4041000000000000_bfe0f0f0f0f0f0f1
-4041000000000000_4041000000000000_3ff0000000000000
-c05c000000000000_4041000000000000_c00a5a5a5a5a5a5a
-406e000000000000_4041000000000000_401c3c3c3c3c3c3c
-c07ffff583a53b8e_4041000000000000_c02e1e143faa9268
-408199999999999a_4041000000000000_4030909090909091
-c093333333333333_4041000000000000_c042121212121212
-40a028f5c28f5c29_4041000000000000_405e6b3804d19e6b
-c0b004189374bc6a_4041000000000000_c06e25d3e863448b
-40c00068db8bac71_4041000000000000_407e1ee37f25085c
-c0dd1745d1745d17_4041000000000000_c08b6132a7041b61
-40e5555555555555_4041000000000000_4094141414141414
-c0f999999999999a_4041000000000000_c0a8181818181818
-410c71c71c71c71c_4041000000000000_40bac5701ac5701a
-4000000000000000_c05c000000000000_bfa2492492492492
-c018000000000000_c05c000000000000_3fbb6db6db6db6db
-4024000000000000_c05c000000000000_bfc6db6db6db6db7
-c032000000000000_c05c000000000000_3fd4924924924925
-4041000000000000_c05c000000000000_bfe36db6db6db6db
-c05c000000000000_c05c000000000000_3ff0000000000000
-406e000000000000_c05c000000000000_c001249249249249
-c07ffff583a53b8e_c05c000000000000_4012491e945e6b2d
-408199999999999a_c05c000000000000_c0241d41d41d41d5
-c093333333333333_c05c000000000000_4035f15f15f15f16
-40a028f5c28f5c29_c05c000000000000_c04277f44c118de6
-c0b004189374bc6a_c05c000000000000_40524dd2f1a9fbe7
-40c00068db8bac71_c05c000000000000_c062499c689fa081
-c0dd1745d1745d17_c05c000000000000_40709f959c427e56
-40e5555555555555_c05c000000000000_c088618618618618
-c0f999999999999a_c05c000000000000_409d41d41d41d41e
-410c71c71c71c71c_c05c000000000000_c0a0410410410410
-4000000000000000_406e000000000000_3f91111111111111
-c018000000000000_406e000000000000_bfa999999999999a
-4024000000000000_406e000000000000_3fb5555555555555
-c032000000000000_406e000000000000_bfc3333333333333
-4041000000000000_406e000000000000_3fd2222222222222
-c05c000000000000_406e000000000000_bfedddddddddddde
-406e000000000000_406e000000000000_3ff0000000000000
-c07ffff583a53b8e_406e000000000000_c001110b796930d4
-408199999999999a_406e000000000000_4012c5f92c5f92c6
-c093333333333333_406e000000000000_c0247ae147ae147b
-40a028f5c28f5c29_406e000000000000_40313cc1e098ead6
-c0b004189374bc6a_406e000000000000_c041156f8c384071
-40c00068db8bac71_406e000000000000_40511180ea2e95ce
-c0dd1745d1745d17_406e000000000000_c06f07c1f07c1f07
-40e5555555555555_406e000000000000_4076c16c16c16c16
-c0f999999999999a_406e000000000000_c08b4e81b4e81b4f
-410c71c71c71c71c_406e000000000000_409e573ac901e573
-4000000000000000_c07ffff583a53b8e_bf8000053e2f1a08
-c018000000000000_c07ffff583a53b8e_3f980007dd46a70b
-4024000000000000_c07ffff583a53b8e_bfa400068dbae089
-c032000000000000_c07ffff583a53b8e_3fb20005e5f4fd48
-4041000000000000_c07ffff583a53b8e_bfc1000592120ba8
-c05c000000000000_c07ffff583a53b8e_3fdc00092cd26d8d
-406e000000000000_c07ffff583a53b8e_bfee0009d49850ce
-c07ffff583a53b8e_c07ffff583a53b8e_3ff0000000000000
-408199999999999a_c07ffff583a53b8e_c001999f5e009ca2
-c093333333333333_c07ffff583a53b8e_401333397dd21f3c
-40a028f5c28f5c29_c07ffff583a53b8e_c02028fb0e2a73e4
-c0b004189374bc6a_c07ffff583a53b8e_4030041dd2fb6fd0
-40c00068db8bac71_c07ffff583a53b8e_c040006e19dd229c
-c0dd1745d1745d17_c07ffff583a53b8e_405d174f59ca00c8
-40e5555555555555_c07ffff583a53b8e_c065555c52e9780a
-c0f999999999999a_c07ffff583a53b8e_407999a1fd1829a6
-410c71c71c71c71c_c07ffff583a53b8e_c08c71d06e8ca00d
-4000000000000000_408199999999999a_3f7d1745d1745d17
-c018000000000000_408199999999999a_bf85d1745d1745d1
-4024000000000000_408199999999999a_3f922e8ba2e8ba2e
-c032000000000000_408199999999999a_bfa05d1745d1745d
-4041000000000000_408199999999999a_3fbee8ba2e8ba2e8
-c05c000000000000_408199999999999a_bfc9745d1745d174
-406e000000000000_408199999999999a_3fdb45d1745d1745
-c07ffff583a53b8e_408199999999999a_bfed173c4921d90c
-408199999999999a_408199999999999a_3ff0000000000000
-c093333333333333_408199999999999a_c001745d1745d174
-40a028f5c28f5c29_408199999999999a_401d61bed61bed61
-c0b004189374bc6a_408199999999999a_c02d1eb851eb851d
-40c00068db8bac71_408199999999999a_403d180477e6ade4
-c0dd1745d1745d17_408199999999999a_c04a723f789854a0
-40e5555555555555_408199999999999a_405364d9364d9364
-c0f999999999999a_408199999999999a_c06745d1745d1746
-410c71c71c71c71c_408199999999999a_4079dbcc48676f30
-4000000000000000_c093333333333333_bf6aaaaaaaaaaaab
-c018000000000000_c093333333333333_3f74000000000000
-4024000000000000_c093333333333333_bf80aaaaaaaaaaab
-c032000000000000_c093333333333333_3f9e000000000000
-4041000000000000_c093333333333333_bfac555555555556
-c05c000000000000_c093333333333333_3fb7555555555556
-406e000000000000_c093333333333333_bfc9000000000000
-c07ffff583a53b8e_c093333333333333_3fdaaaa1edb45c4c
-408199999999999a_c093333333333333_bfed555555555556
-c093333333333333_c093333333333333_3ff0000000000000
-40a028f5c28f5c29_c093333333333333_c00aeeeeeeeeeeef
-c0b004189374bc6a_c093333333333333_401ab17e4b17e4b1
-40c00068db8bac71_c093333333333333_c02aab596de8ca12
-c0dd1745d1745d17_c093333333333333_40383e0f83e0f83e
-40e5555555555555_c093333333333333_c041c71c71c71c72
-c0f999999999999a_c093333333333333_4055555555555556
-410c71c71c71c71c_c093333333333333_c067b425ed097b42
-4000000000000000_40a028f5c28f5c29_3f5faee41e6a7498
-c018000000000000_40a028f5c28f5c29_bf67c32b16cfd772
-4024000000000000_40a028f5c28f5c29_3f73cd4e930288df
-c032000000000000_40a028f5c28f5c29_bf81d260511be196
-4041000000000000_40a028f5c28f5c29_3f90d4e930288df1
-c05c000000000000_40a028f5c28f5c29_bfabb9079a9d2605
-406e000000000000_40a028f5c28f5c29_3fbdb3f5dc83cd4f
-c07ffff583a53b8e_40a028f5c28f5c29_bfcfaed9bca398bf
-408199999999999a_40a028f5c28f5c29_3fd16cfd7720f354
-c093333333333333_40a028f5c28f5c29_bfe30288df0cac5b
-40a028f5c28f5c29_40a028f5c28f5c29_3ff0000000000000
-c0b004189374bc6a_40a028f5c28f5c29_c00fb70081c635bb
-40c00068db8bac71_40a028f5c28f5c29_401fafb3c1f3a182
-c0dd1745d1745d17_40a028f5c28f5c29_c02ccd899003afd0
-40e5555555555555_40a028f5c28f5c29_40351f42bef1a310
-c0f999999999999a_40a028f5c28f5c29_c04958b67ebb907a
-410c71c71c71c71c_40a028f5c28f5c29_405c29ae53ecd96a
-4000000000000000_c0b004189374bc6a_bf4ff7d0f16c2e0a
-c018000000000000_c0b004189374bc6a_3f57f9dcb5112287
-4024000000000000_c0b004189374bc6a_bf63fae296e39cc6
-c032000000000000_c0b004189374bc6a_3f71fb6587ccd9e5
-4041000000000000_c0b004189374bc6a_bf80fba700417875
-c05c000000000000_c0b004189374bc6a_3f9bf8d6d33ea848
-406e000000000000_c0b004189374bc6a_bfadf853e2556b29
-c07ffff583a53b8e_c0b004189374bc6a_3fbff7c677bfebb5
-408199999999999a_c0b004189374bc6a_bfc1951951951953
-c093333333333333_c0b004189374bc6a_3fd32e4a2a741b9f
-40a028f5c28f5c29_c0b004189374bc6a_bfe024d3c19930d9
-c0b004189374bc6a_c0b004189374bc6a_3ff0000000000000
-40c00068db8bac71_c0b004189374bc6a_c00ff8a272e15ca2
-c0dd1745d1745d17_c0b004189374bc6a_401d0fd53890e409
-40e5555555555555_c0b004189374bc6a_c0254fe0a0f2c95b
-c0f999999999999a_c0b004189374bc6a_4039930d8df024d5
-410c71c71c71c71c_c0b004189374bc6a_c04c6a80d6990c7a
-4000000000000000_40c00068db8bac71_3f3fff2e4e46e7a8
-c018000000000000_40c00068db8bac71_bf47ff62bab52dbe
-4024000000000000_40c00068db8bac71_3f53ff7cf0ec50c9
-c032000000000000_40c00068db8bac71_bf61ff8a0c07e24f
-4041000000000000_40c00068db8bac71_3f70ff909995ab11
-c05c000000000000_40c00068db8bac71_bf8bff48847e0ab3
-406e000000000000_40c00068db8bac71_3f9dff3b6962792e
-c07ffff583a53b8e_40c00068db8bac71_bfafff23d230d9a4
-408199999999999a_40c00068db8bac71_3fb1992644a6ff6a
-c093333333333333_40c00068db8bac71_bfc332b5622a8afe
-40a028f5c28f5c29_40c00068db8bac71_3fd0288bdd4a34fd
-c0b004189374bc6a_40c00068db8bac71_bfe003af9fc0ed8b
-40c00068db8bac71_40c00068db8bac71_3ff0000000000000
-c0dd1745d1745d17_40c00068db8bac71_c00d16872fe35e3c
-40e5555555555555_40c00068db8bac71_401554c989849a70
-c0f999999999999a_40c00068db8bac71_c02998f1d838b954
-410c71c71c71c71c_40c00068db8bac71_403c710cb75b7895
-4000000000000000_c0dd1745d1745d17_bf2199999999999a
-c018000000000000_c0dd1745d1745d17_3f3a666666666667
-4024000000000000_c0dd1745d1745d17_bf46000000000000
-c032000000000000_c0dd1745d1745d17_3f53cccccccccccd
-4041000000000000_c0dd1745d1745d17_bf62b33333333333
-c05c000000000000_c0dd1745d1745d17_3f7ecccccccccccd
-406e000000000000_c0dd1745d1745d17_bf80800000000000
-c07ffff583a53b8e_c0dd1745d1745d17_3f919993d5347a5b
-408199999999999a_c0dd1745d1745d17_bfa35c28f5c28f5d
-c093333333333333_c0dd1745d1745d17_3fb51eb851eb851f
-40a028f5c28f5c29_c0dd1745d1745d17_bfc1c6a7ef9db22d
-c0b004189374bc6a_c0dd1745d1745d17_3fd19e1b089a0275
-40c00068db8bac71_c0dd1745d1745d17_bfe19a0cf1800a7c
-c0dd1745d1745d17_c0dd1745d1745d17_3ff0000000000000
-40e5555555555555_c0dd1745d1745d17_c007777777777777
-c0f999999999999a_c0dd1745d1745d17_401c28f5c28f5c2a
-410c71c71c71c71c_c0dd1745d1745d17_c02f49f49f49f49f
-4000000000000000_40e5555555555555_3f18000000000000
-c018000000000000_40e5555555555555_bf22000000000000
-4024000000000000_40e5555555555555_3f3e000000000000
-c032000000000000_40e5555555555555_bf4b000000000000
-4041000000000000_40e5555555555555_3f59800000000000
-c05c000000000000_40e5555555555555_bf65000000000000
-406e000000000000_40e5555555555555_3f76800000000000
-c07ffff583a53b8e_40e5555555555555_bf87fff822bbecab
-408199999999999a_40e5555555555555_3f9a666666666667
-c093333333333333_40e5555555555555_bfaccccccccccccd
-40a028f5c28f5c29_40e5555555555555_3fb83d70a3d70a3e
-c0b004189374bc6a_40e5555555555555_bfc80624dd2f1a9f
-40c00068db8bac71_40e5555555555555_3fd8009d495182aa
-c0dd1745d1745d17_40e5555555555555_bfe5d1745d1745d2
-40e5555555555555_40e5555555555555_3ff0000000000000
-c0f999999999999a_40e5555555555555_c003333333333334
-410c71c71c71c71c_40e5555555555555_4015555555555555
-4000000000000000_c0f999999999999a_bf04000000000000
-c018000000000000_c0f999999999999a_3f1e000000000000
-4024000000000000_c0f999999999999a_bf29000000000000
-c032000000000000_c0f999999999999a_3f36800000000000
-4041000000000000_c0f999999999999a_bf45400000000000
-c05c000000000000_c0f999999999999a_3f51800000000000
-406e000000000000_c0f999999999999a_bf62c00000000000
-c07ffff583a53b8e_c0f999999999999a_3f73fff972474538
-408199999999999a_c0f999999999999a_bf86000000000000
-c093333333333333_c0f999999999999a_3f97ffffffffffff
-40a028f5c28f5c29_c0f999999999999a_bfa4333333333333
-c0b004189374bc6a_c0f999999999999a_3fb4051eb851eb84
-40c00068db8bac71_c0f999999999999a_bfc40083126e978d
-c0dd1745d1745d17_c0f999999999999a_3fd22e8ba2e8ba2e
-40e5555555555555_c0f999999999999a_bfeaaaaaaaaaaaaa
-c0f999999999999a_c0f999999999999a_3ff0000000000000
-410c71c71c71c71c_c0f999999999999a_c001c71c71c71c71
-4000000000000000_410c71c71c71c71c_3ef2000000000000
-c018000000000000_410c71c71c71c71c_bf0b000000000000
-4024000000000000_410c71c71c71c71c_3f16800000000000
-c032000000000000_410c71c71c71c71c_bf24400000000000
-4041000000000000_410c71c71c71c71c_3f33200000000000
-c05c000000000000_410c71c71c71c71c_bf4f800000000000
-406e000000000000_410c71c71c71c71c_3f50e00000000000
-c07ffff583a53b8e_410c71c71c71c71c_bf61fffa1a0cf180
-408199999999999a_410c71c71c71c71c_3f73ccccccccccce
-c093333333333333_410c71c71c71c71c_bf8599999999999a
-40a028f5c28f5c29_410c71c71c71c71c_3f922e147ae147ae
-c0b004189374bc6a_410c71c71c71c71c_bfa2049ba5e353f8
-40c00068db8bac71_410c71c71c71c71c_3fb20075f6fd21ff
-c0dd1745d1745d17_410c71c71c71c71c_bfc05d1745d1745d
-40e5555555555555_410c71c71c71c71c_3fd8000000000000
-c0f999999999999a_410c71c71c71c71c_bfecccccccccccce
-410c71c71c71c71c_410c71c71c71c71c_3ff0000000000000
--- a/pipelined/testbench/testbench-coremark_bare.sv
+++ b/pipelined/testbench/testbench-coremark_bare.sv
@ -1,145 +0,0 @@
-///////////////////////////////////////////
-// testbench-imperas.sv
-//
-// Written: David_Harris@hmc.edu 9 January 2021
-// Modified: 
-//
-// Purpose: Wally Testbench and helper modules
-//          Applies test programs from the Imperas suite
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// MIT LICENSE
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
-// software and associated documentation files (the "Software"), to deal in the Software 
-// without restriction, including without limitation the rights to use, copy, modify, merge, 
-// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
-// to whom the Software is furnished to do so, subject to the following conditions:
-//
-//   The above copyright notice and this permission notice shall be included in all copies or 
-//   substantial portions of the Software.
-//
-//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
-//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
-//   OR OTHER DEALINGS IN THE SOFTWARE.
-////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"
-module testbench();
-  logic        clk;
-  logic        reset, reset_ext;
-  int test, i, errors, totalerrors;
-  logic [31:0] sig32[10000:0];
-  logic [`XLEN-1:0] signature[10000:0];
-  logic [`XLEN-1:0] testadr;
-  string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
-  logic [`XLEN-1:0] meminit;
-  string tests[];
-  logic [`AHBW-1:0] HRDATAEXT;
-  logic             HREADYEXT, HRESPEXT;
-  logic [31:0]      HADDR;
-  logic [`AHBW-1:0] HWDATA;
-  logic             HWRITE;
-  logic [2:0]       HSIZE;
-  logic [2:0]       HBURST;
-  logic [3:0]       HPROT;
-  logic [1:0]       HTRANS;
-  logic             HMASTLOCK;
-  logic             HCLK, HRESETn;
-  
-  // pick tests based on modes supported
-  initial 
-//  tests = {"../../tests/imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremarkcodemod.bare.riscv.memfile", "1000"};
-  tests = {"../../benchmarks/riscv-coremark/work/coremark.bare.riscv.memfile", "1000"};
-  string signame, memfilename;
-  logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
-  logic UARTSin, UARTSout;
-  logic SDCCLK;
-  logic      SDCCmdIn;
-  logic      SDCCmdOut;
-  logic      SDCCmdOE;
-  logic [3:0] SDCDatIn;
-
-  logic             HREADY;
-  logic 	    HSELEXT;
-
-  assign SDCmd = 1'bz;
-  assign SDCDat = 4'bz;
-  
-
-  // instantiate device to be tested
-  assign GPIOPinsIn = 0;
-  assign UARTSin = 1;
-  assign HREADYEXT = 1;
-  assign HRESPEXT = 0;
-  assign HRDATAEXT = 0;
-  wallypipelinedsoc dut(.clk, .reset_ext, .reset(), .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
-                        .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
-                        .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
-                        .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); 
-
-  logic [31:0] InstrW;
-  flopenr  #(32)   InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW,  dut.core.ifu.InstrM, InstrW);
-
-  // Track names of instructions
-    instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
-                dut.core.ifu.FinalInstrRawF,
-                dut.core.ifu.InstrD, dut.core.ifu.InstrE,
-                dut.core.ifu.InstrM,  InstrW,
-                InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
-/*
-  instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
-                dut.core.ifu.icache.controller.FinalInstrRawF,
-                dut.core.ifu.InstrD, dut.core.ifu.InstrE,
-                dut.core.ifu.InstrM, InstrW,
-                InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
-*/
-  logic [`XLEN-1:0] PCW;
-  flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.core.ifu.PCM, PCW);
-  
-  // initialize tests
-  integer j;
-  initial
-    begin
-      totalerrors = 0;
-      // read test vectors into memory
-      memfilename = tests[0];
-      $readmemh(memfilename, dut.uncore.ram.ram.RAM);
-      //for(j=268437955; j < 268566528; j = j+1)
-        //dut.uncore.ram.RAM[j] = 64'b0;
-//      ProgramAddrMapFile = "../../imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremark.RV64IM.bare.elf.objdump.addr";
-//      ProgramAddrMapFile = "../../imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremark.RV64IM.bare.elf.objdump.lab";
-        //dut.uncore.ram.RAM[268437713]=64'b1;
-    reset_ext = 1; # 22; reset_ext = 0;
-    end
-  // generate clock to sequence tests
-  always
-    begin
-      clk = 1; # 5; clk = 0; # 5;
-    end
-  always @(negedge clk)
-    begin
-      if (dut.core.priv.priv.ecallM) begin
-        #20;
-        $display("Code ended with ebreakM");
-        $stop;
-      end
-    end
-
-  initial begin
-//    $readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory);
-//    $readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.memory);
-    $readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
-	  $readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem);    
-
-  end
-
-
-  
-endmodule
-/* verilator lint_on STMTDLY */
-/* verilator lint_on WIDTH */
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -10,120 +10,64 @@ module testbenchfp;
  parameter TEST="none";

  string      Tests[];        // list of tests to be run
-  string      FmaRneTests[];  // list of FMA round to nearest even tests to run
-  string      FmaRuTests[];   // list of FMA round up tests to run
-  string      FmaRdTests[];   // list of FMA round down tests to run
-  string      FmaRzTests[];   // list of FMA round twords zero
-  string      FmaRnmTests[];  // list of FMA round to nearest max magnitude
  logic [2:0] OpCtrl[];       // list of op controls
  logic [2:0] Unit[];         // list of units being tested
  logic WriteInt[];           // Is being written to integer resgiter
  logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
  logic [1:0] Fmt[];          // list of formats for the other units
-  logic [1:0] FmaFmt[];       // list of formats for the FMA
  

  logic               clk=0;
  logic [31:0]        TestNum=0;    // index for the test
-  logic [31:0]        FmaTestNum=0;    // index for the test
  logic [31:0]        OpCtrlNum=0;  // index for OpCtrl
  logic [31:0]        errors=0;     // how many errors
  logic [31:0]        VectorNum=0;  // index for test vector
-  logic [31:0]        FmaVectorNum=0;  // index for test vector
  logic [31:0]        FrmNum=0;     // index for rounding mode
-  logic [`FLEN*4+7:0] TestVectors[46464:0];     // list of test vectors
-  logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; // list of fma rne test vectors
-  logic [`FLEN*4+7:0] FmaRuVectors[6133248:0];  // list of fma ru test vectors
-  logic [`FLEN*4+7:0] FmaRdVectors[6133248:0];  // list of fma rd test vectors
-  logic [`FLEN*4+7:0] FmaRzVectors[6133248:0];  // list of fma rz test vectors
-  logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; // list of fma rnm test vectors
+  logic [`FLEN*4+7:0] TestVectors[6133248:0];     // list of test vectors

-  logic [1:0]           FmaFmtVal, FmtVal;          // value of the current Fmt
+  logic [1:0]           FmtVal;          // value of the current Fmt
  logic [2:0]           UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
  logic                 WriteIntVal;                // value of the current WriteInt
  logic [`FLEN-1:0]     X, Y, Z;                    // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRneX, FmaRneY, FmaRneZ;  // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRzX, FmaRzY, FmaRzZ;     // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRuX, FmaRuY, FmaRuZ;     // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRdX, FmaRdY, FmaRdZ;     // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRnmX, FmaRnmY, FmaRnmZ;  // inputs read from TestFloat
  logic [`XLEN-1:0]     SrcA;                       // integer input
  logic [`FLEN-1:0]	    Ans;                        // correct answer from TestFloat
-  logic [`FLEN-1:0]     FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat
  logic [`FLEN-1:0]	    Res;                                                // result from other units
-  logic [`FLEN-1:0]	    FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // results from FMA
  logic [4:0]	 	        AnsFlg;                                             // correct flags read from testfloat
-  logic [4:0]           FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
-  logic [4:0]	 	        ResFlg;                                                            // Result flags
-  logic [4:0]           FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
-  logic	[`FMTBITS-1:0]  ModFmt, FmaModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
-  logic [`FLEN-1:0]     FmaRes, DivRes, CmpRes, CvtRes;  // Results from each unit
-  logic [`XLEN-1:0]     CvtIntRes;  // Results from each unit
+  logic [4:0]	 	        ResFlg, Flg;                                                            // Result flags
+  logic	[`FMTBITS-1:0]  ModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
+  logic [`FLEN-1:0]     FpRes, FpCmpRes;  // Results from each unit
+  logic [`XLEN-1:0]     IntRes, CmpRes;  // Results from each unit
  logic [4:0]           FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
-  logic                 ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN;   // is the outputed result NaN
-  logic                 AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN;   // is the correct answer NaN
-  logic                 NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
+  logic                 AnsNaN, ResNaN, NaNGood;
  logic                 XSgn, YSgn, ZSgn;                     // sign of the inputs
-  logic                 FmaRneXSgn, FmaRneYSgn, FmaRneZSgn;
-  logic                 FmaRzXSgn, FmaRzYSgn, FmaRzZSgn;
-  logic                 FmaRuXSgn, FmaRuYSgn, FmaRuZSgn;
-  logic                 FmaRdXSgn, FmaRdYSgn, FmaRdZSgn;
-  logic                 FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn;
  logic [`NE-1:0]       XExp, YExp, ZExp;                     // exponent of the inputs
-  logic [`NE-1:0]       FmaRneXExp, FmaRneYExp, FmaRneZExp;
-  logic [`NE-1:0]       FmaRzXExp, FmaRzYExp, FmaRzZExp;
-  logic [`NE-1:0]       FmaRuXExp, FmaRuYExp, FmaRuZExp;
-  logic [`NE-1:0]       FmaRdXExp, FmaRdYExp, FmaRdZExp;
-  logic [`NE-1:0]       FmaRnmXExp, FmaRnmYExp, FmaRnmZExp;
  logic [`NF:0]         XMan, YMan, ZMan;                     // mantissas of the inputs
-  logic [`NF:0]         FmaRneXMan, FmaRneYMan, FmaRneZMan;
-  logic [`NF:0]         FmaRzXMan, FmaRzYMan, FmaRzZMan;
-  logic [`NF:0]         FmaRuXMan, FmaRuYMan, FmaRuZMan;
-  logic [`NF:0]         FmaRdXMan, FmaRdYMan, FmaRdZMan;
-  logic [`NF:0]         FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
  logic                 XNaN, YNaN, ZNaN;                     // is the input NaN
-  logic                 FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
-  logic                 FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
-  logic                 FmaRuXNaN, FmaRuYNaN, FmaRuZNaN;
-  logic                 FmaRdXNaN, FmaRdYNaN, FmaRdZNaN;
-  logic                 FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN;
  logic                 XSNaN, YSNaN, ZSNaN;                  // is the input a signaling NaN
-  logic                 FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN;
-  logic                 FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN;
-  logic                 FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
-  logic                 FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
-  logic                 FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
  logic                 XDenorm, ZDenorm;            // is the input denormalized
-  logic                 FmaRneXDenorm, FmaRneZDenorm;
-  logic                 FmaRzXDenorm, FmaRzZDenorm;
-  logic                 FmaRuXDenorm, FmaRuZDenorm;
-  logic                 FmaRdXDenorm, FmaRdZDenorm;
-  logic                 FmaRnmXDenorm, FmaRnmZDenorm;
  logic                 XInf, YInf, ZInf;                   // is the input infinity
-  logic                 FmaRneXInf, FmaRneYInf, FmaRneZInf;
-  logic                 FmaRzXInf, FmaRzYInf, FmaRzZInf;
-  logic                 FmaRuXInf, FmaRuYInf, FmaRuZInf;
-  logic                 FmaRdXInf, FmaRdYInf, FmaRdZInf;
-  logic                 FmaRnmXInf, FmaRnmYInf, FmaRnmZInf;
  logic                 XZero, YZero, ZZero;                // is the input zero
-  logic                 FmaRneXZero, FmaRneYZero, FmaRneZZero;
-  logic                 FmaRzXZero, FmaRzYZero, FmaRzZZero;
-  logic                 FmaRuXZero, FmaRuYZero, FmaRuZZero;
-  logic                 FmaRdXZero, FmaRdYZero, FmaRdZZero;
-  logic                 FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
  logic                 XExpMax, YExpMax, ZExpMax;         // is the input's exponent all ones  
+  logic  [`LGLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
+  logic        IntZeroE;
+  logic CvtResSgnE;
+  logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
+  logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
+	logic [`LOGLGLEN-1:0] CvtShiftAmtE;  // how much to shift by
+  logic CvtResDenormUfE;
+  

  // in-between FMA signals
  logic                 Mult;
-  logic [`NE+1:0]	      ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp;
-  logic 				        AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky;
-  logic 					      KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd; 
-  logic [$clog2(3*`NF+7)-1:0]	NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt;
-  logic [3*`NF+5:0]	    SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum;       
-  logic 			          InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ;
-  logic 			          NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum;
-  logic 			          ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff;
-  logic 			          PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn;
+  logic [`NE+1:0]	      ProdExpE;
+  logic 				        AddendStickyE;
+  logic 					      KillProdE; 
+  logic [$clog2(3*`NF+7)-1:0]	FmaNormCntE;
+  logic [3*`NF+5:0]	    SumE;       
+  logic 			          InvZE;
+  logic 			          NegSumE;
+  logic 			          ZSgnEffE;
+  logic 			          PSgnE;


  ///////////////////////////////////////////////////////////////////////////////////////////////
@ -282,15 +226,13 @@ module testbenchfp;
      //     end
      // end
      if (TEST === "fma"   | TEST === "all") begin  // if fused-mutliply-add is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f128_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f128_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f128_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
-        // add the format for the Fma
-        FmaFmt = {FmaFmt, 2'b11};
+        Tests = {Tests, f128fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b11};
+        end
      end
    end
    if (`D_SUPPORTED) begin // if double precision is supported
@ -411,14 +353,13 @@ module testbenchfp;
      //   end
      // end
      if (TEST === "fma"   | TEST === "all") begin // if the fused multiply add is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f64_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f64_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f64_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
-        FmaFmt = {FmaFmt, 2'b01};
+        Tests = {Tests, f64fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b01};
+        end
      end
    end
    if (`F_SUPPORTED) begin // if single precision being supported
@ -523,14 +464,13 @@ module testbenchfp;
      //   end
      // end
      if (TEST === "fma"   | TEST === "all")  begin // if fma is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f32_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f32_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f32_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
-        FmaFmt = {FmaFmt, 2'b00};
+        Tests = {Tests, f32fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b00};
+        end
      end
    end
    if (`ZFH_SUPPORTED) begin // if half precision supported
@ -617,19 +557,18 @@ module testbenchfp;
      //   end
      // end
      if (TEST === "fma"   | TEST === "all") begin // if fma is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f16_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f16_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f16_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
-        FmaFmt = {FmaFmt, 2'b10};
+        Tests = {Tests, f16fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b10};
+        end
      end
    end

    // check if nothing is being tested
-    if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin
+    if (Tests.size() == 0) begin
      $display("TEST %s not supported in this configuration", TEST);
      $stop;
    end
@ -648,26 +587,17 @@ module testbenchfp;
  // Read the first test
  initial begin
    $display("\n\nRunning %s vectors", Tests[TestNum]);
-    $display("Running FMA precision %d", FmaTestNum);
    $readmemh({`PATH, Tests[TestNum]}, TestVectors);
-    $readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
-    $readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
-    $readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
-    $readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
-    $readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
    // set the test index to 0
    TestNum = 0;
-    FmaTestNum = 0;
  end

  // set a the signals for all tests
-  always_comb FmaFmtVal = FmaFmt[FmaTestNum];
  always_comb UnitVal = Unit[TestNum];
  always_comb FmtVal = Fmt[TestNum];
  always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
  always_comb WriteIntVal = WriteInt[OpCtrlNum];
  always_comb FrmVal = Frm[FrmNum];
-  assign Mult = OpCtrlVal === 3'b100;

  // modify the format signal if only 2 percisions supported
  //    - 1 for the larger precision
@ -675,61 +605,9 @@ module testbenchfp;
  always_comb begin
    if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
    else ModFmt = FmtVal;
-    if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
-    else FmaModFmt = FmaFmtVal;
  end

  // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
-  readfmavectors readfmarnevectors (.clk, .TestVector(FmaRneVectors[FmaVectorNum]), .Ans(FmaRneAns), .AnsFlg(FmaRneAnsFlg), 
-                                    .XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
-                                    .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), 
-                                    .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), 
-                                    .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
-                                    .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), 
-                                    .XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm), 
-                                    .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
-                                    .XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
-                                    .X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
-  readfmavectors readfmarzvectors (.clk, .TestVector(FmaRzVectors[FmaVectorNum]), .Ans(FmaRzAns), .AnsFlg(FmaRzAnsFlg), 
-                                    .XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
-                                    .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), 
-                                    .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), 
-                                    .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
-                                    .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), 
-                                    .XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm), 
-                                    .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
-                                    .XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
-  readfmavectors readfmaruvectors (.clk, .TestVector(FmaRuVectors[FmaVectorNum]), .Ans(FmaRuAns), .AnsFlg(FmaRuAnsFlg), 
-                                    .XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
-                                    .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), 
-                                    .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), 
-                                    .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
-                                    .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), 
-                                    .XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm), 
-                                    .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
-                                    .XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
-  readfmavectors readfmardvectors (.clk, .TestVector(FmaRdVectors[FmaVectorNum]), .Ans(FmaRdAns), .AnsFlg(FmaRdAnsFlg), 
-                                    .XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
-                                    .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), 
-                                    .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
-                                    .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
-                                    .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), 
-                                    .XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm), 
-                                    .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
-                                    .XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
-  readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[FmaVectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg), 
-                                    .XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
-                                    .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), 
-                                    .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
-                                    .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
-                                    .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), 
-                                    .XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm), 
-                                    .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
-                                    .XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                    .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
                                    .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
@ -754,124 +632,30 @@ module testbenchfp;
  ///////////////////////////////////////////////////////////////////////////////////////////////

  // instantiate devices under test
-  //    - one fma for each precison
-  //    - all the units for the other tests (including fma for add/sub/mul)
-  fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn), 
-              .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), 
-              .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
-              .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ), 
-              .NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
-              .ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd)); 
-  fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn), 
-              .ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
-              .XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan), 
-              .XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN), 
-              .XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero), 
-              .XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf), 
-              .XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN), 
-              .KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp), 
-              .SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff), 
-              .PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE), 
-              .FMAFlgM(FmaRneResFlg), .FMAResM(FmaRneRes), .Mult(1'b0));
-  fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), 
-              .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), 
-              .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
-              .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ), 
-              .NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
-              .ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd)); 
-  fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn), 
-              .ZExpM(FmaRzZExp),  .ZDenormM(FmaRzZDenorm),
-              .XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan), 
-              .XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN), 
-              .XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero), 
-              .XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf), 
-              .XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN), 
-              .KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp), 
-              .SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff), 
-              .PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ), 
-              .FMAFlgM(FmaRzResFlg), .FMAResM(FmaRzRes), .Mult(1'b0));
-  fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), 
-              .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), 
-              .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
-              .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ), 
-              .NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
-              .ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd)); 
-  fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn), 
-              .ZExpM(FmaRuZExp),  .ZDenormM(FmaRuZDenorm),
-              .XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan), 
-              .XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN), 
-              .XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero), 
-              .XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf), 
-              .XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN), 
-              .KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp), 
-              .SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff), 
-              .PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU), 
-              .FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
-  fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), 
-              .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), 
-              .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
-              .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ), 
-              .NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
-              .ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd)); 
-  fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn), 
-              .ZExpM(FmaRdZExp),  .ZDenormM(FmaRdZDenorm),
-              .XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan), 
-              .XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN), 
-              .XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero), 
-              .XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf), 
-              .XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN), 
-              .KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp), 
-              .SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff), 
-              .PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD), 
-              .FMAFlgM(FmaRdResFlg), .FMAResM(FmaRdRes), .Mult(1'b0));
-  fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), 
-              .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), 
-              .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
-              .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ), 
-              .NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
-              .ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd)); 
-  fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn), 
-              .ZExpM(FmaRnmZExp),  .ZDenormM(FmaRnmZDenorm),
-              .XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan), 
-              .XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN), 
-              .XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero), 
-              .XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf), 
-              .XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN), 
-              .KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp), 
-              .SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff), 
-              .PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM), 
-              .FMAFlgM(FmaRnmResFlg), .FMAResM(FmaRnmRes), .Mult(1'b0));  
-  fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), 
+  fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), 
              .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), 
              .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
              .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
-              .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
+              .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
              .ProdExpE, .AddendStickyE, .KillProdE); 
-  fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn), 
-              .ZExpM(ZExp),  .ZDenormM(ZDenorm),
-              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), 
-              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), 
-              .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), 
-              .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), 
-              .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), 
+              
+  postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
+              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
+              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
+              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
+              .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
+              .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
+              .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
              .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), 
-              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
-              .FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
-  // fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf), 
-  //             .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
+              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
+              .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
  
 fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
-            .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
-            .XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), 
-            .CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
+            .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
+            .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
  fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
-              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), 
-              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
+              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
+              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
  // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), 
  //                 .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
  //                 .CvtRes, .CvtFlgE);
@ -900,60 +684,6 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
 ///////////////////////////////////////////////////////////////////////////////////////////////

  //Check if the correct answer and result is a NaN
-  always_comb begin
-    case (FmaFmtVal)
-        4'b11: begin // quad             
-          FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]);
-        end
-        4'b01: begin // double                 
-          FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]);
-        end
-        4'b00: begin // single
-          FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]);
-        end
-        4'b10: begin // half
-          FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]);
-        end
-    endcase
-  end
-
-
  always_comb begin
    if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
      // an integer output can't be a NaN
@ -1004,20 +734,20 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
 always_comb begin
    // select the result to check
    case (UnitVal)
-      `FMAUNIT: Res = FmaRes;
-      `DIVUNIT: Res = DivRes;
+      `FMAUNIT: Res = FpRes;
+      `DIVUNIT: Res = FpRes;
      `CMPUNIT: Res = CmpRes;
-      `CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
-      `CVTFPUNIT: Res = CvtRes;
+      `CVTINTUNIT: if(WriteIntVal) Res = IntRes; else Res = FpRes;
+      `CVTFPUNIT: Res = FpRes;
    endcase

    // select the flag to check
    case (UnitVal)
-      `FMAUNIT: ResFlg = FmaFlg;
-      `DIVUNIT: ResFlg = DivFlg;
+      `FMAUNIT: ResFlg = Flg;
+      `DIVUNIT: ResFlg = Flg;
      `CMPUNIT: ResFlg = CmpFlg;
-      `CVTINTUNIT: ResFlg = CvtFlg;
-      `CVTFPUNIT: ResFlg = CvtFlg;
+      `CVTINTUNIT: ResFlg = Flg;
+      `CVTFPUNIT: ResFlg = Flg;
    endcase
 end
  // check results on falling edge of clk
@ -1027,117 +757,6 @@ end
    // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
    //    - the sign of the NaN does not matter for the opperations being tested
    //    - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
-    case (FmaFmtVal)
-      4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
-      4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
-      4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
-      4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
-      4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
-      4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
-      4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
-      4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
-      4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
-      4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
-      4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
-      4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
-      4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
-      4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
-      4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
-      4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]})));
-    endcase
    if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
      case (FmtVal)
        4'b11: NaNGood =  (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
@ -1221,77 +840,8 @@ end
      $stop;
    end

-    // check if the fma tests are correct
-    if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx)  & (FmaRneResFlg === FmaRneAnsFlg | FmaRneAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RNE");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlg, FmaRneAns, FmaRneAnsFlg);
-      $stop;
-    end
-    if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlg === FmaRzAnsFlg | FmaRzAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RZ");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlg, FmaRzAns, FmaRzAnsFlg);
-      $stop;
-    end
-    if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlg === FmaRuAnsFlg | FmaRuAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RU");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlg, FmaRuAns, FmaRuAnsFlg);
-      $stop;
-    end
-    if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlg === FmaRdAnsFlg | FmaRdAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RD");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlg, FmaRdAns, FmaRdAnsFlg);
-      $stop;
-    end
-    if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlg === FmaRnmAnsFlg | FmaRnmAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RNM");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlg, FmaRnmAns, FmaRnmAnsFlg);
-      $stop;
-    end

    VectorNum += 1; // increment the vector
-    FmaVectorNum += 1; // increment the vector
-
-    // check to see if there more vectors in this test
-    // *** fix this so that fma and other run sepratly - re-add fma num
-    if ((FmaRneVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRzVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRuVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRdVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRnmVectors[FmaVectorNum][0] === 1'bx & FmaRneTests[FmaTestNum] !== "" )) begin // if reached the end of file
-
-      // increment the test
-      FmaTestNum += 1;
-
-      // clear the vectors
-      for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
-      // read next files
-      $readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
-      $readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
-      $readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
-      $readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
-      $readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
-
-      // set the vector index back to 0
-      FmaVectorNum = 0;
-
-      // if no more Tests - finish
-      if(Tests[TestNum] === "" & 
-        FmaRneTests[FmaTestNum] === "" & 
-        FmaRzTests[FmaTestNum] === "" & 
-        FmaRuTests[FmaTestNum] === "" & 
-        FmaRdTests[FmaTestNum] === "" & 
-        FmaRnmTests[FmaTestNum] === "") begin
-        $display("\nAll Tests completed with %d errors\n", errors);
-        $stop;
-      end 
-
-      $display("Running FMA precision %d", FmaTestNum);
-    end

    if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file

@ -1299,14 +849,9 @@ end
      TestNum += 1;

      // clear the vectors
-      for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
+      for(int i=0; i<6133248; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
      // read next files
      $readmemh({`PATH, Tests[TestNum]}, TestVectors);
-      $readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
-      $readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
-      $readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
-      $readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
-      $readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);

      // set the vector index back to 0
      VectorNum = 0;
@ -1317,12 +862,7 @@ end
      else FrmNum = 0; 

      // if no more Tests - finish
-      if(Tests[TestNum] === "" & 
-        FmaRneTests[FmaTestNum] === "" & 
-        FmaRzTests[FmaTestNum] === "" & 
-        FmaRuTests[FmaTestNum] === "" & 
-        FmaRdTests[FmaTestNum] === "" & 
-        FmaRnmTests[FmaTestNum] === "") begin
+      if(Tests[TestNum] === "") begin
        $display("\nAll Tests completed with %d errors\n", errors);
        $stop;
      end 
@ -1335,89 +875,6 @@ endmodule



-
-
-
-
-
-
-
-
-
-module readfmavectors (
-  input logic                 clk,
-  input logic [`FMTBITS-1:0]  FmaModFmt,              // the modified format
-  input logic [1:0]           FmaFmt,                 // the format of the FMA inputs
-  input logic [`FLEN*4+7:0]   TestVector,             // the test vector
-  output logic [`FLEN-1:0]    Ans,                    // the correct answer
-  output logic [4:0]          AnsFlg,                 // the correct flag
-  output logic                XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
-  output logic [`NE-1:0]      XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
-  output logic [`NF:0]        XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
-  output logic                XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
-  output logic                XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                XDenormE, ZDenormE,   // is XYZ denormalized
-  output logic                XZeroE, YZeroE, ZZeroE,         // is XYZ zero
-  output logic                XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic [`FLEN-1:0]    X, Y, Z                 // inputs
-);
-
-  logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
-  // apply test vectors on rising edge of clk
-  // Format of vectors Inputs(1/2/3)_AnsFlg
-  always @(posedge clk) begin
-    #1; 
-    AnsFlg = TestVector[4:0];
-    case (FmaFmt)
-      2'b11: begin       // quad
-        X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
-        Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
-        Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
-        Ans = TestVector[8+(`Q_LEN-1):8];
-      end
-      2'b01:	begin	  // double
-          X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
-          Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
-          Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
-          Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
-      end
-      2'b00:	begin	  // single
-          X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
-          Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
-          Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
-          Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
-      end
-      2'b10:	begin	  // half
-          X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
-          Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
-          Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
-          Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
-      end
-    endcase
-  end
-  
-  unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
-                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
-                .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
-                .XExpMaxE, .ZDenormE);
-endmodule
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 module readvectors (
  input logic clk,
  input logic [`FLEN*4+7:0] TestVector,
@ -1451,33 +908,61 @@ module readvectors (
      `FMAUNIT:
        case (Fmt)
          2'b11: begin       // quad
-            X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
-            if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
+              Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
+              Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
+            end
+            else begin
+              X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
+              if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
+            end
            Ans = TestVector[8+(`Q_LEN-1):8];
          end
          2'b01:	begin	  // double
-            X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
-            if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; 
-            else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}; 
-            else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
+              Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
+              Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
+            end
+            else begin
+              X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
+              if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; 
+              else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}; 
+              else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
+            end
            Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
          end
          2'b00:	begin	  // single
-            X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
-            if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; 
-            else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}; 
-            else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
+              Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
+              Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
+            end
+            else begin
+              X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
+              if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; 
+              else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}; 
+              else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
+            end
            Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
          end
          2'b10:	begin	  // half
-            X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
-            if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; 
-            else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}; 
-            else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
+              Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
+              Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
+            end
+            else begin
+              X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
+              if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; 
+              else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}; 
+              else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
+            end
            Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
          end
        endcase
@ -1532,19 +1017,19 @@ module readvectors (
          2'b11: begin       // quad
          case (OpCtrl[1:0])
            2'b11: begin       // quad
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
+              X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
              Ans = TestVector[8+(`Q_LEN-1):8];
            end
            2'b01:	begin	  // double
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
+              X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
              Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
            end
            2'b00:	begin	  // single
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
+              X = {TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
              Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
            end
            2'b10:	begin	  // half
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
+              X = {TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
              Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
            end
          endcase
@ -1628,12 +1113,12 @@ module readvectors (
                Ans = TestVector[8+(`Q_LEN-1):8];
              end
              2'b01:	begin	  // quad -> long
-                X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
+                X = {TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
                SrcA = {`XLEN{1'bx}};
                Ans = {TestVector[8+(`XLEN-1):8]};
              end
              2'b00:	begin	  // quad -> int
-                X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
+                X = {TestVector[8+32+`Q_LEN-1:8+(32)]};
                SrcA = {`XLEN{1'bx}};
                Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
              end
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@ -396,6 +396,7 @@ module riscvassertions;
    assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
    assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
    assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
+    assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
    assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
    assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
    assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
@ -418,6 +419,7 @@ module riscvassertions;
    //assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
    //assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");    
    assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
+    assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
  end
 endmodule

--- a/pipelined/testbench/tests-fp.vh
+++ b/pipelined/testbench/tests-fp.vh
@ -2,7 +2,7 @@
 `define ADD_OPCTRL 3'b110
 `define MUL_OPCTRL 3'b100
 `define SUB_OPCTRL 3'b111
-`define FADD_OPCTRL 3'b000
+`define FMA_OPCTRL 3'b000
 `define DIV_OPCTRL 3'b000
 `define SQRT_OPCTRL 3'b001
 `define LE_OPCTRL 3'b011
@ -21,11 +21,11 @@
 `define RU  3'b011
 `define RD  3'b010
 `define RNM 3'b100
-`define FMAUNIT 0
+`define FMAUNIT 2
 `define DIVUNIT 1
-`define CVTINTUNIT 2
-`define CVTFPUNIT 3
-`define CMPUNIT 4
+`define CVTINTUNIT 0
+`define CVTFPUNIT 4
+`define CMPUNIT 3

 string f16rv32cvtint[] = '{
 	"ui32_to_f16_rne.tv",
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@ -40,9 +40,6 @@ string tvpaths[] = '{
    "../../addins/embench-iot/bd_speed/src/"
 };

-
-
-   // *** make sure these are somewhere
  string coremark[] = '{
    `COREMARK,
    "coremark.bare.riscv"
@ -1105,11 +1102,11 @@ string imperas32f[] = '{
    // "rv64i_m/D/d_fdiv_b20-01", // looks like flags
    // "rv64i_m/D/d_fdiv_b2-01", // also flags
    // "rv64i_m/D/d_fdiv_b21-01", // positive NaNs again
-    "rv64i_m/D/d_fdiv_b3-01",
+    // "rv64i_m/D/d_fdiv_b3-01",
    // "rv64i_m/D/d_fdiv_b4-01", // flags
-    "rv64i_m/D/d_fdiv_b5-01",
+    // "rv64i_m/D/d_fdiv_b5-01",
    // "rv64i_m/D/d_fdiv_b6-01", // flags
-    "rv64i_m/D/d_fdiv_b7-01",
+    // "rv64i_m/D/d_fdiv_b7-01",
    // "rv64i_m/D/d_fdiv_b8-01", // flags
    // "rv64i_m/D/d_fdiv_b9-01",  might be a flag too
    "rv64i_m/D/d_feq_b1-01",
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@ -332,7 +332,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
 redirect -append $filename { echo "\n\n\n//// Critical paths through fma1 ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma1/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma2/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
--- a/tests/fp/create_vectors.sh
+++ b/tests/fp/create_vectors.sh
@ -2,482 +2,482 @@
 BUILD="../../addins/TestFloat-3e/build/Linux-x86_64-GCC"
 OUTPUT="./vectors"
 echo "Creating ui32_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
 echo "Creating ui32_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
 echo "Creating ui32_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
 echo "Creating ui32_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
 echo "Creating ui64_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
 echo "Creating ui64_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
 echo "Creating ui64_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
 echo "Creating ui64_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
 echo "Creating i32_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
 echo "Creating i32_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
 echo "Creating i32_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
 echo "Creating i32_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
 echo "Creating i64_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
 echo "Creating i64_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
 echo "Creating i64_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
 echo "Creating i64_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
 echo "Creating f16_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
 echo "Creating f32_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
 echo "Creating f64_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
 echo "Creating f128_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
 echo "Creating f16_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
 echo "Creating f32_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
 echo "Creating f64_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
 echo "Creating f128_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
 echo "Creating f16_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
 echo "Creating f32_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
 echo "Creating f64_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
 echo "Creating f128_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
 echo "Creating f16_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
 echo "Creating f32_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
 echo "Creating f64_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
 echo "Creating f128_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
 echo "Creating f16_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
 echo "Creating f16_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
 echo "Creating f16_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
 echo "Creating f32_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
 echo "Creating f32_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
 echo "Creating f32_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
 echo "Creating f64_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
 echo "Creating f64_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
 echo "Creating f64_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
 echo "Creating f128_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
 echo "Creating f128_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
 echo "Creating f128_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
 echo "Creating f16_add vectors"
-$BUILD/testfloat_gen -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
-$BUILD/testfloat_gen -rminMag f16_add > $OUTPUT/f16_add_rz.tv
-$BUILD/testfloat_gen -rmax f16_add > $OUTPUT/f16_add_ru.tv
-$BUILD/testfloat_gen -rmin f16_add > $OUTPUT/f16_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_add > $OUTPUT/f16_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_add > $OUTPUT/f16_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_add > $OUTPUT/f16_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
 echo "Creating f32_add vectors"
-$BUILD/testfloat_gen -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
-$BUILD/testfloat_gen -rminMag f32_add > $OUTPUT/f32_add_rz.tv
-$BUILD/testfloat_gen -rmax f32_add > $OUTPUT/f32_add_ru.tv
-$BUILD/testfloat_gen -rmin f32_add > $OUTPUT/f32_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_add > $OUTPUT/f32_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_add > $OUTPUT/f32_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_add > $OUTPUT/f32_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
 echo "Creating f64_add vectors"
-$BUILD/testfloat_gen -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
-$BUILD/testfloat_gen -rminMag f64_add > $OUTPUT/f64_add_rz.tv
-$BUILD/testfloat_gen -rmax f64_add > $OUTPUT/f64_add_ru.tv
-$BUILD/testfloat_gen -rmin f64_add > $OUTPUT/f64_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_add > $OUTPUT/f64_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_add > $OUTPUT/f64_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_add > $OUTPUT/f64_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
 echo "Creating f128_add vectors"
-$BUILD/testfloat_gen -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
-$BUILD/testfloat_gen -rminMag f128_add > $OUTPUT/f128_add_rz.tv
-$BUILD/testfloat_gen -rmax f128_add > $OUTPUT/f128_add_ru.tv
-$BUILD/testfloat_gen -rmin f128_add > $OUTPUT/f128_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_add > $OUTPUT/f128_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_add > $OUTPUT/f128_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_add > $OUTPUT/f128_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
 echo "Creating f16_sub vectors"
-$BUILD/testfloat_gen -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
-$BUILD/testfloat_gen -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
-$BUILD/testfloat_gen -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
 echo "Creating f32_sub vectors"
-$BUILD/testfloat_gen -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
-$BUILD/testfloat_gen -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
-$BUILD/testfloat_gen -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
 echo "Creating f64_sub vectors"
-$BUILD/testfloat_gen -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
-$BUILD/testfloat_gen -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
-$BUILD/testfloat_gen -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
 echo "Creating f128_sub vectors"
-$BUILD/testfloat_gen -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
-$BUILD/testfloat_gen -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
-$BUILD/testfloat_gen -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
 echo "Creating f16_mul vectors"
-$BUILD/testfloat_gen -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
-$BUILD/testfloat_gen -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
-$BUILD/testfloat_gen -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
 echo "Creating f32_mul vectors"
-$BUILD/testfloat_gen -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
-$BUILD/testfloat_gen -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
-$BUILD/testfloat_gen -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
 echo "Creating f64_mul vectors"
-$BUILD/testfloat_gen -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
-$BUILD/testfloat_gen -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
-$BUILD/testfloat_gen -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
 echo "Creating f128_mul vectors"
-$BUILD/testfloat_gen -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
-$BUILD/testfloat_gen -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
-$BUILD/testfloat_gen -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
 echo "Creating f16_div vectors"
-$BUILD/testfloat_gen -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
-$BUILD/testfloat_gen -rminMag f16_div > $OUTPUT/f16_div_rz.tv
-$BUILD/testfloat_gen -rmax f16_div > $OUTPUT/f16_div_ru.tv
-$BUILD/testfloat_gen -rmin f16_div > $OUTPUT/f16_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_div > $OUTPUT/f16_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_div > $OUTPUT/f16_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_div > $OUTPUT/f16_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
 echo "Creating f32_div vectors"
-$BUILD/testfloat_gen -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
-$BUILD/testfloat_gen -rminMag f32_div > $OUTPUT/f32_div_rz.tv
-$BUILD/testfloat_gen -rmax f32_div > $OUTPUT/f32_div_ru.tv
-$BUILD/testfloat_gen -rmin f32_div > $OUTPUT/f32_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_div > $OUTPUT/f32_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_div > $OUTPUT/f32_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_div > $OUTPUT/f32_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
 echo "Creating f64_div vectors"
-$BUILD/testfloat_gen -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
-$BUILD/testfloat_gen -rminMag f64_div > $OUTPUT/f64_div_rz.tv
-$BUILD/testfloat_gen -rmax f64_div > $OUTPUT/f64_div_ru.tv
-$BUILD/testfloat_gen -rmin f64_div > $OUTPUT/f64_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_div > $OUTPUT/f64_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_div > $OUTPUT/f64_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_div > $OUTPUT/f64_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
 echo "Creating f128_div vectors"
-$BUILD/testfloat_gen -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
-$BUILD/testfloat_gen -rminMag f128_div > $OUTPUT/f128_div_rz.tv
-$BUILD/testfloat_gen -rmax f128_div > $OUTPUT/f128_div_ru.tv
-$BUILD/testfloat_gen -rmin f128_div > $OUTPUT/f128_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_div > $OUTPUT/f128_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_div > $OUTPUT/f128_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_div > $OUTPUT/f128_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
 echo "Creating f16_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
 echo "Creating f32_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
 echo "Creating f64_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
 echo "Creating f128_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
 echo "Creating f16_eq vectors"
-$BUILD/testfloat_gen -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
-$BUILD/testfloat_gen -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
-$BUILD/testfloat_gen -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
 echo "Creating f32_eq vectors"
-$BUILD/testfloat_gen -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
-$BUILD/testfloat_gen -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
-$BUILD/testfloat_gen -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
 echo "Creating f64_eq vectors"
-$BUILD/testfloat_gen -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
-$BUILD/testfloat_gen -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
-$BUILD/testfloat_gen -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
 echo "Creating f128_eq vectors"
-$BUILD/testfloat_gen -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
-$BUILD/testfloat_gen -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
-$BUILD/testfloat_gen -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
 echo "Creating f16_le vectors"
-$BUILD/testfloat_gen -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
-$BUILD/testfloat_gen -rminMag f16_le > $OUTPUT/f16_le_rz.tv
-$BUILD/testfloat_gen -rmax f16_le > $OUTPUT/f16_le_ru.tv
-$BUILD/testfloat_gen -rmin f16_le > $OUTPUT/f16_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_le > $OUTPUT/f16_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_le > $OUTPUT/f16_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_le > $OUTPUT/f16_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
 echo "Creating f32_le vectors"
-$BUILD/testfloat_gen -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
-$BUILD/testfloat_gen -rminMag f32_le > $OUTPUT/f32_le_rz.tv
-$BUILD/testfloat_gen -rmax f32_le > $OUTPUT/f32_le_ru.tv
-$BUILD/testfloat_gen -rmin f32_le > $OUTPUT/f32_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_le > $OUTPUT/f32_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_le > $OUTPUT/f32_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_le > $OUTPUT/f32_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
 echo "Creating f64_le vectors"
-$BUILD/testfloat_gen -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
-$BUILD/testfloat_gen -rminMag f64_le > $OUTPUT/f64_le_rz.tv
-$BUILD/testfloat_gen -rmax f64_le > $OUTPUT/f64_le_ru.tv
-$BUILD/testfloat_gen -rmin f64_le > $OUTPUT/f64_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_le > $OUTPUT/f64_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_le > $OUTPUT/f64_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_le > $OUTPUT/f64_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
 echo "Creating f128_le vectors"
-$BUILD/testfloat_gen -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
-$BUILD/testfloat_gen -rminMag f128_le > $OUTPUT/f128_le_rz.tv
-$BUILD/testfloat_gen -rmax f128_le > $OUTPUT/f128_le_ru.tv
-$BUILD/testfloat_gen -rmin f128_le > $OUTPUT/f128_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_le > $OUTPUT/f128_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_le > $OUTPUT/f128_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_le > $OUTPUT/f128_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
 echo "Creating f16_lt vectors"
-$BUILD/testfloat_gen -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
-$BUILD/testfloat_gen -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
-$BUILD/testfloat_gen -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
 echo "Creating f32_lt vectors"
-$BUILD/testfloat_gen -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
-$BUILD/testfloat_gen -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
-$BUILD/testfloat_gen -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
 echo "Creating f64_lt vectors"
-$BUILD/testfloat_gen -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
-$BUILD/testfloat_gen -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
-$BUILD/testfloat_gen -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
 echo "Creating f128_lt vectors"
-$BUILD/testfloat_gen -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
-$BUILD/testfloat_gen -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
-$BUILD/testfloat_gen -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
 echo "Creating f16_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
 echo "Creating f32_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
 echo "Creating f64_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
 echo "Creating f128_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
				`@ -0,0 +1 @@`
				`Subproject commit f3e8f2e0941e42961aadcc52750b1b5577c157c9`