Merge pull request #1268 from davidharrishmc/dev

E154 Lab 2 fma16_testgen and odd solutions
2025-02-11 06:05:49 +00:00 · 2025-02-08 16:04:41 -08:00 · 2025-02-08 16:04:41 -08:00 · a1b4ed751a
commit a1b4ed751a
parent 43bc4a452f f909f7d315
23 changed files with 630 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -161,6 +161,12 @@ examples/asm/sumtest/sumtest
 examples/asm/example/example
 examples/asm/trap/trap
 examples/asm/etc/pause
 examples/C/fmul
 examples/exercises/fma16/fma16.sv
 examples/exercises/fma16/fma16_testgen
 examples/exercises/fma16/sol 
 examples/exercises/riscvsoc_solutions
 # Other
 external
--- a/examples/exercises/17p1/17p1.c
+++ b/examples/exercises/17p1/17p1.c
@ -0,0 +1,6 @@
 #include <string.h>
 int main(void) {
    char str[] = "Hello Wally!";
    return strlen(str);
 }
--- a/examples/exercises/17p1/Makefile
+++ b/examples/exercises/17p1/Makefile
@ -0,0 +1,11 @@
 TARGET = 17p1
 $(TARGET).objdump: $(TARGET).elf
 	riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump
 $(TARGET).elf: $(TARGET).c Makefile
 	riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc_zbb -mabi=lp64 -mcmodel=medany \
 	    -nostartfiles -T../../link/link.ld $(TARGET).c -o $(TARGET).elf
 clean:
 	rm -f $(TARGET).elf $(TARGET).objdump
--- a/examples/exercises/3p1/3p1.S
+++ b/examples/exercises/3p1/3p1.S
@ -0,0 +1,10 @@
 .section .text.init
 .globl rvtest_entry_point
 rvtest_entry_point:
    li t0, 0x42
    li t1, 0xED
    add t2, t0, t1
 self_loop:
    j self_loop
--- a/examples/exercises/3p1/Makefile
+++ b/examples/exercises/3p1/Makefile
@ -0,0 +1,13 @@
 TARGET = 3p1
 $(TARGET).objdump: $(TARGET).elf
 	riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump
 $(TARGET).elf: $(TARGET).S Makefile
 	riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \
 	    -nostartfiles -T../../link/link.ld $(TARGET).S -o $(TARGET).elf
 clean:
 	rm -f $(TARGET).elf $(TARGET).objdump
--- a/examples/exercises/3p13/3p13.c
+++ b/examples/exercises/3p13/3p13.c
@ -0,0 +1,6 @@
 #include <string.h>
 int main(void) {
    char str[] = "Hello Wally!";
    return strlen(str);
 }
--- a/examples/exercises/3p13/Makefile
+++ b/examples/exercises/3p13/Makefile
@ -0,0 +1,11 @@
 TARGET = 3p13
 $(TARGET).objdump: $(TARGET).elf
 	riscv64-unknown-elf-objdump -D $(TARGET).elf > $(TARGET).objdump
 $(TARGET).elf: $(TARGET).c Makefile
 	riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv32gc -mabi=ilp32 -mcmodel=medany \
 	    -nostartfiles -T../../link/link.ld $(TARGET).c -o $(TARGET).elf
 clean:
 	rm -f $(TARGET).elf $(TARGET).objdump
--- a/examples/exercises/3p15/Makefile
+++ b/examples/exercises/3p15/Makefile
@ -0,0 +1,19 @@
 TARGET = sumtest
 $(TARGET).objdump: $(TARGET)
 	riscv64-unknown-elf-objdump -D $(TARGET) > $(TARGET).objdump
 $(TARGET): $(TARGET).S sum.S Makefile
 	riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \
 	    -nostartfiles -T../../link/link.ld $(TARGET).S sum.S
 sim:
 	riscv_sim_RV64 -T $(TARGET).signature.output --signature-granularity 8 $(TARGET) 
 	diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit
 	echo "Signature matches! Success!"
 clean:
 	rm -f $(TARGET) $(TARGET).objdump $(TARGET).signature.output
--- a/examples/exercises/3p15/sum.S
+++ b/examples/exercises/3p15/sum.S
@ -0,0 +1,32 @@
 // sum.s
 // David_Harris@hmc.edu 24 December 2021
 // Add up numbers from 1 to N.
 // result in s0, i in s1, N in a0, return answer in a0
 //
 // long sum(long N) {
 //   long result, i;
 //   result = 0;
 //   for (i=1; i<=N; i++) result = result + i;
 //   return result;
 // }
 .global sum
 sum:
    addi sp, sp, -16    # make room to save s0 and s1 on the stack
    sd s0, 0(sp)
    sd s1, 8(sp)
    li s0, 0            # result = 0
    li s1, 1            # i = 1
 for: bgt s1, a0, done   # exit loop if i > n
    add s0, s0, s1      # result = result + i
    addi s1, s1, 1      # i++
    j for               # repeat
 done:
    mv a0, s0           # put result in a0 to return
    ld s0, 0(sp)        # restore s0 and s1 from stack
    ld s1, 8(sp)
    addi sp, sp, 16  
    ret                 # return from function
--- a/examples/exercises/3p15/sumtest.S
+++ b/examples/exercises/3p15/sumtest.S
@ -0,0 +1,43 @@
 // sumtest.S
 // David_Harris@hmc.edu 24 December 2021
 .global rvtest_entry_point
 rvtest_entry_point:
    la sp, topofstack   # Initialize stack pointer
    la t0, N            # get address of N in data
    ld a0, 0(t0)        # load N
    csrr s8, instret    # count instructions before call
    jal sum             # call sum(N)
    csrr s9, instret    # count instructions after call
    sub s9, s9, s8      # length of call
    la t0, begin_signature  # address of signature
    sd a0, 0(t0)        # store sum(N) in signature
    sd s9, 8(t0)        # record performance
 write_tohost:
    la t1, tohost
    li t0, 1            # 1 for success, 3 for failure
    sd t0, 0(t1)        # send success code
 self_loop:
    j self_loop         # wait
 .section .tohost 
 tohost:                 # write to HTIF
    .dword 0
 fromhost:
    .dword 0
 .data
 N:
    .dword 6
 .EQU XLEN,64
 begin_signature:
    .fill 2*(XLEN/32),4,0xdeadbeef    # 
 end_signature:
 # Initialize stack with room for 512 bytes
 .bss
    .space 512
 topofstack:
--- a/examples/exercises/3p15/sumtest.reference_output
+++ b/examples/exercises/3p15/sumtest.reference_output
@ -0,0 +1,2 @@
 0000000000000015
 0000000000000025
--- a/examples/exercises/3p3/Makefile
+++ b/examples/exercises/3p3/Makefile
@ -0,0 +1,19 @@
 TARGET = sumtest
 $(TARGET).objdump: $(TARGET)
 	riscv64-unknown-elf-objdump -D $(TARGET) > $(TARGET).objdump
 $(TARGET): $(TARGET).S sum.S Makefile
 	riscv64-unknown-elf-gcc -g -o $(TARGET) -march=rv64gc -mabi=lp64 -mcmodel=medany \
 	    -nostartfiles -T../../link/link.ld $(TARGET).S sum.S
 sim:
 	spike +signature=$(TARGET).signature.output +signature-granularity=8 $(TARGET)
 	diff --ignore-case $(TARGET).signature.output $(TARGET).reference_output || exit
 	echo "Signature matches! Success!"
 clean:
 	rm -f $(TARGET) $(TARGET).objdump $(TARGET).signature.output
--- a/examples/exercises/3p3/sum.S
+++ b/examples/exercises/3p3/sum.S
@ -0,0 +1,32 @@
 // sum.s
 // David_Harris@hmc.edu 24 December 2021
 // Add up numbers from 1 to N.
 // result in s0, i in s1, N in a0, return answer in a0
 //
 // long sum(long N) {
 //   long result, i;
 //   result = 0;
 //   for (i=1; i<=N; i++) result = result + i;
 //   return result;
 // }
 .global sum
 sum:
    addi sp, sp, -16    # make room to save s0 and s1 on the stack
    sd s0, 0(sp)
    sd s1, 8(sp)
    li s0, 0            # result = 0
    li s1, 1            # i = 1
 for: bgt s1, a0, done   # exit loop if i > n
    add s0, s0, s1      # result = result + i
    addi s1, s1, 1      # i++
    j for               # repeat
 done:
    mv a0, s0           # put result in a0 to return
    ld s0, 0(sp)        # restore s0 and s1 from stack
    ld s1, 8(sp)
    addi sp, sp, 16  
    ret                 # return from function
--- a/examples/exercises/3p3/sumtest.S
+++ b/examples/exercises/3p3/sumtest.S
@ -0,0 +1,43 @@
 // sumtest.S
 // David_Harris@hmc.edu 24 December 2021
 .global rvtest_entry_point
 rvtest_entry_point:
    la sp, topofstack   # Initialize stack pointer
    la t0, N            # get address of N in data
    ld a0, 0(t0)        # load N
    csrr s8, instret    # count instructions before call
    jal sum             # call sum(N)
    csrr s9, instret    # count instructions after call
    sub s9, s9, s8      # length of call
    la t0, begin_signature  # address of signature
    sd a0, 0(t0)        # store sum(N) in signature
    sd s9, 8(t0)        # record performance
 write_tohost:
    la t1, tohost
    li t0, 1            # 1 for success, 3 for failure
    sd t0, 0(t1)        # send success code
 self_loop:
    j self_loop         # wait
 .section .tohost 
 tohost:                 # write to HTIF
    .dword 0
 fromhost:
    .dword 0
 .data
 N:
    .dword 6
 .EQU XLEN,64
 begin_signature:
    .fill 2*(XLEN/32),4,0xdeadbeef    # 
 end_signature:
 # Initialize stack with room for 512 bytes
 .bss
    .space 512
 topofstack:
--- a/examples/exercises/3p3/sumtest.reference_output
+++ b/examples/exercises/3p3/sumtest.reference_output
@ -0,0 +1,2 @@
 0000000000000015
 0000000000000025
--- a/examples/exercises/3p5/Makefile
+++ b/examples/exercises/3p5/Makefile
@ -0,0 +1,32 @@
 TARGET = matvecmul
 $(TARGET).objdump: $(TARGET)
 	riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
 $(TARGET): $(TARGET).c Makefile
 	riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O\
 	  -march=rv64gc -mabi=lp64d -mcmodel=medany \
 	  -nostdlib -static -lm -fno-tree-loop-distribute-patterns \
 	  -T../../C/common/test.ld -I../../C/common \
 	  $(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c
 # Compiler flags:
 #  -o $(TARGET) defines the name of the output file
 #  -g generates debugging symbols for gdb
 #  -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization
 #  -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits
 #  -static forces static linking (no dynamic shared libraries on bare metal)
 #  -lm links the math library if necessary (when #include math.h)
 #  -nostdlib avoids inserting standard startup files and default libraries 
 #     because we are using crt.s on bare metal
 #  -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library 
 #  -T specifies the linker file
 #  -I specifies the include path (e.g. for util.h)
 #  The last line defines the C files to compile.  
 #    crt.S is needed as our startup file to initialize the processor
 #    syscalls.c implements printf through the HTIF for Spike
 # other flags from riscv-tests makefiles that don't seem to be important
 #	  -ffast-math -DPREALLOCATE=1 -std=gnu99  \
 #     -fno-common -fno-builtin-printf -nostartfiles -lgcc \
 clean:
 	rm -f $(TARGET) $(TARGET).objdump
--- a/examples/exercises/3p5/matvecmul.c
+++ b/examples/exercises/3p5/matvecmul.c
@ -0,0 +1,22 @@
 #include <stdio.h>  // supports printf
 #include "util.h"   // supports verify
 // Matrix-vector multiplication y = Ax.  
 // A is an m rows x n columns matrix.
 void matvecmul(int A[], int x[], int y[], int m, int n) {
 	int i, j, sum;
 	for (i=0; i<m; i = i + 1) {
 		sum = 0;
 		for (j=0; j<n; j = j + 1) 
 			sum = sum + A[i*n+j] * x[j];
 		y[i] = sum;
 	}
 }
 void main(void) {
  int A[6] = {1, 2, 3, 4, 5, 6};
  int x[3] = {7, 8, 9};
  int y[2];
  matvecmul(A, x, y, 2, 3);
 }
--- a/examples/exercises/3p7/Makefile
+++ b/examples/exercises/3p7/Makefile
@ -0,0 +1,32 @@
 TARGET = fir
 $(TARGET).objdump: $(TARGET)
 	riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
 $(TARGET): $(TARGET).c Makefile
 	riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O2\
 	  -march=rv64gc -mabi=lp64d -mcmodel=medany \
 	  -nostdlib -static -lm -fno-tree-loop-distribute-patterns \
 	  -T../../C/common/test.ld -I../../C/common \
 	  $(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c
 # Compiler flags:
 #  -o $(TARGET) defines the name of the output file
 #  -g generates debugging symbols for gdb
 #  -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization
 #  -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits
 #  -static forces static linking (no dynamic shared libraries on bare metal)
 #  -lm links the math library if necessary (when #include math.h)
 #  -nostdlib avoids inserting standard startup files and default libraries 
 #     because we are using crt.s on bare metal
 #  -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library 
 #  -T specifies the linker file
 #  -I specifies the include path (e.g. for util.h)
 #  The last line defines the C files to compile.  
 #    crt.S is needed as our startup file to initialize the processor
 #    syscalls.c implements printf through the HTIF for Spike
 # other flags from riscv-tests makefiles that don't seem to be important
 #	  -ffast-math -DPREALLOCATE=1 -std=gnu99  \
 #     -fno-common -fno-builtin-printf -nostartfiles -lgcc \
 clean:
 	rm -f $(TARGET) $(TARGET).objdump
--- a/examples/exercises/3p7/fir.c
+++ b/examples/exercises/3p7/fir.c
@ -0,0 +1,83 @@
 #include <stdio.h>  // supports printf
 #include "util.h"   // supports verify
 // Add two Q1.31 fixed point numbers
 int add_q31(int a, int b) {
 	return a + b;
 }
 // Multiplly two Q1.31 fixed point numbers
 int mul_q31(int a, int b) {
 	long res = (long)a * (long)b;
 	int result = res >> 31; // shift right to get the 32-bit result; this is equivalent to shifting left by 1 and discarding the bottom 32 bits
 	//printf("mul_q31: a = %x, b = %x, res = %lx, result = %x\n", a, b, res, result);
 	return result; 
 }
 // low pass filter x with coefficients c, result in y
 // n is the length of x, m is the length of c
 // y[i] = c[0]*x[i] + c[1]*x[i+1] + ... + c[m-1]*x[i+m-1]
 // inputs in Q1.31 format
 void fir(int x[], int c[], int y[], int n, int m) {
 	int i, j;
 	for (j=0; j<n-m+1; j++) {
 		y[j] = 0;
 		for (i=0; i<m; i++) 
 			y[j] = add_q31(y[j], mul_q31(c[i], x[j-i+(m-1)]));
 	}
 }
 int main(void) {
 	int32_t sin_table[20] = { // in Q1.31 format
 		0x00000000, // sin(0*2pi/10)
 		0x4B3C8C12, // sin(1*2pi/10)
 		0x79BC384D, // sin(2*2pi/10)
 		0x79BC384D, // sin(3*2pi/10)
 		0x4B3C8C12, // sin(4*2pi/10)
 		0x00000000, // sin(5*2pi/10)
 		0xB4C373EE, // sin(6*2pi/10)
 		0x8643C7B3, // sin(7*2pi/10)
 		0x8643C7B3, // sin(8*2pi/10)
 		0xB4C373EE, // sin(9*2pi/10)
 		0x00000000, // sin(10*2pi/10)
 		0x4B3C8C12, // sin(11*2pi/10)
 		0x79BC384D, // sin(12*2pi/10)
 		0x79BC384D, // sin(13*2pi/10)
 		0x4B3C8C12, // sin(14*2pi/10)
 		0x00000000, // sin(15*2pi/10)
 		0xB4C373EE, // sin(16*2pi/10)
 		0x8643C7B3, // sin(17*2pi/10)
 		0x8643C7B3, // sin(18*2pi/10)
 		0xB4C373EE  // sin(19*2pi/10)
 	};  
 	int lowpass[4] = {0x20000001, 0x20000002, 0x20000003, 0x20000004}; // 1/4 in Q1.31 format
 	int y[17];
  	int expected[17] = { // in Q1.31 format
 		0x4fad3f2f,
 		0x627c6236,
 		0x4fad3f32,
 		0x1e6f0e17,
 		0xe190f1eb,
 		0xb052c0ce,
 		0x9d839dc6,
 		0xb052c0cb,
 		0xe190f1e6,
 		0x1e6f0e12,
 		0x4fad3f2f,
 		0x627c6236,
 		0x4fad3f32,
 		0x1e6f0e17,
 		0xe190f1eb,
 		0xb052c0ce,
 		0x9d839dc6
 	};
  	setStats(1);		// record initial mcycle and minstret
 	fir(sin_table, lowpass, y, 20, 4);
 	setStats(0);		// record elapsed mcycle and minstret
 	for (int i=0; i<17; i++) {
 		printf("y[%d] = %x\n", i, y[i]);
 	}
  	return verify(16, y, expected); // check the 1 element of s matches expected. 0 means success
 }
--- a/examples/exercises/3p9/Makefile
+++ b/examples/exercises/3p9/Makefile
@ -0,0 +1,32 @@
 TARGET = inline
 $(TARGET).objdump: $(TARGET)
 	riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
 $(TARGET): $(TARGET).c Makefile
 	riscv64-unknown-elf-gcc -o $(TARGET) -gdwarf-2 -O\
 	  -march=rv64gc -mabi=lp64d -mcmodel=medany \
 	  -nostdlib -static -lm -fno-tree-loop-distribute-patterns \
 	  -T../../C/common/test.ld -I../../C/common \
 	  $(TARGET).c ../../C/common/crt.S ../../C/common/syscalls.c
 # Compiler flags:
 #  -o $(TARGET) defines the name of the output file
 #  -g generates debugging symbols for gdb
 #  -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization
 #  -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits
 #  -static forces static linking (no dynamic shared libraries on bare metal)
 #  -lm links the math library if necessary (when #include math.h)
 #  -nostdlib avoids inserting standard startup files and default libraries 
 #     because we are using crt.s on bare metal
 #  -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library 
 #  -T specifies the linker file
 #  -I specifies the include path (e.g. for util.h)
 #  The last line defines the C files to compile.  
 #    crt.S is needed as our startup file to initialize the processor
 #    syscalls.c implements printf through the HTIF for Spike
 # other flags from riscv-tests makefiles that don't seem to be important
 #	  -ffast-math -DPREALLOCATE=1 -std=gnu99  \
 #     -fno-common -fno-builtin-printf -nostartfiles -lgcc \
 clean:
 	rm -f $(TARGET) $(TARGET).objdump
--- a/examples/exercises/3p9/inline.c
+++ b/examples/exercises/3p9/inline.c
@ -0,0 +1,11 @@
 #include <stdio.h>  // supports printf
 int main(void) {
 	int a = 3;
 	int b = 4;
 	int c;
 	// compute c = a + 2*b using inline assembly
 	asm volatile("slli %0, %1, 1" : "=r" (c) : "r" (b));	      // c = b << 1
 	asm volatile("add %0, %1, %2" : "=r" (c) : "r" (a), "r" (c)); // c = a + c
 	printf("c = %d\n", c);
 }
--- a/examples/exercises/fma16/Makefile
+++ b/examples/exercises/fma16/Makefile
@ -0,0 +1,16 @@
 CC     = gcc
 CFLAGS = -O3 -Wno-format-overflow
 IFLAGS = -I$(WALLY)/addins/berkeley-softfloat-3/source/include/
 LIBS   = $(WALLY)/addins/berkeley-softfloat-3/build/Linux-x86_64-GCC/softfloat.a -lm -lquadmath
 SRCS   = $(wildcard *.c)
 PROGS = $(patsubst %.c,%,$(SRCS))
 all:	$(PROGS)
 %: %.c
 	$(CC) $(CFLAGS) -DSOFTFLOAT_FAST_INT64 $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
 clean:
 	rm -f $(PROGS)
--- a/examples/exercises/fma16/fma16_testgen.c
+++ b/examples/exercises/fma16/fma16_testgen.c
@ -0,0 +1,147 @@
 // fma16_testgen.c
 // David_Harris 8 February 2025
 // Generate tests for 16-bit FMA
 // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include "softfloat.h"
 #include "softfloat_types.h"
 typedef union sp {
  float32_t v;
  float f;
 } sp;
 // lists of tests, terminated with 0x8000
 uint16_t easyExponents[] = {15, 0x8000};
 uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
 void softfloatInit(void) {
    softfloat_roundingMode = softfloat_round_minMag; 
    softfloat_exceptionFlags = 0;
    softfloat_detectTininess = softfloat_tininess_beforeRounding;
 }
 float convFloat(float16_t f16) {
    float32_t f32;
    float res;
    sp r;
    // convert half to float for printing
    f32 = f16_to_f32(f16);
    r.v = f32;
    res = r.f;
    return res;
 }
 void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
    float16_t result;
    int op, flagVals;
    char calc[80], flags[80];
    float32_t x32, y32, z32, r32;
    float xf, yf, zf, rf;
    float16_t smallest;
    if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
    if (!add) z.v = 0x0000; // force z to 0 to avoid add
    if (negp) x.v ^= 0x8000; // flip sign of x to negate p
    if (negz) z.v ^= 0x8000; // flip sign of z to negate z
    op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
 //    printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
    softfloat_exceptionFlags = 0; // clear exceptions
    result = f16_mulAdd(x, y, z); // call SoftFloat to compute expected result
    // Extract expected flags from SoftFloat
    sprintf(flags, "NV: %d OF: %d UF: %d NX: %d", 
        (softfloat_exceptionFlags >> 4) % 2,
        (softfloat_exceptionFlags >> 2) % 2,
        (softfloat_exceptionFlags >> 1) % 2,
        (softfloat_exceptionFlags) % 2);
    // pack these four flags into one nibble, discarding DZ flag
    flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
    // convert to floats for printing
    xf = convFloat(x);
    yf = convFloat(y);
    zf = convFloat(z);
    rf = convFloat(result);
    if (mul)
        if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
        else     sprintf(calc, "%f * %f = %f", xf, yf, rf);
    else         sprintf(calc, "%f + %f = %f", xf, zf, rf);
    // omit denorms, which aren't required for this project
    smallest.v = 0x0400;
    float16_t resultmag = result;
    resultmag.v &= 0x7FFF; // take absolute value
    if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
    if ((softfloat_exceptionFlags) >> 1 % 2) fprintf(fptr, "// skip underflow: ");
    // skip special cases if requested
    if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
    if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed)  fprintf(fptr, "// Skip inf: ");
    if (resultmag.v >  0x7C00 && !nanAllowed)  fprintf(fptr, "// Skip NaN: ");
    // print the test case
    fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
 }
 void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases, 
               FILE *fptr, int *numCases) {
    int i, j;
    // Loop over all of the exponents and fractions, generating and counting all cases
    fprintf(fptr, "%s", desc); fprintf(fptr, "\n");
    *numCases=0;
    for (i=0; e[i] != 0x8000; i++)
        for (j=0; f[j] != 0x8000; j++) {
            cases[*numCases].v = f[j] | e[i]<<10;
            *numCases = *numCases + 1;
        }
 }
 void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
    int i, j, k, numCases;
    float16_t x, y, z;
    float16_t cases[100000];
    FILE *fptr;
    char fn[80];
    sprintf(fn, "work/%s.tv", testName);
    if ((fptr = fopen(fn, "w")) == 0) {
        printf("Error opening to write file %s.  Does directory exist?\n", fn);
        exit(1);
    }
    prepTests(e, f, testName, desc, cases, fptr, &numCases);
    z.v = 0x0000;
    for (i=0; i < numCases; i++) { 
        x.v = cases[i].v;
        for (j=0; j<numCases; j++) {
            y.v = cases[j].v;
            for (k=0; k<=sgn; k++) {
                y.v ^= (k<<15);
                genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
            }
        }
    }
    fclose(fptr);
 }
 int main()
 {
    if (system("mkdir -p work") != 0) exit(1); // create work directory if it doesn't exist
    softfloatInit(); // configure softfloat modes
    // Test cases: multiplication
    genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
 /*  // example of how to generate tests with a different rounding mode
    softfloat_roundingMode = softfloat_round_near_even; 
    genMulTests(easyExponents, easyFracts, 0, "fmul_0_rne", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RNE", 1, 0, 0, 0); */
    // Add your cases here
    return 0;
 }