Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main

2025-02-11 06:05:49 +00:00 · 2022-06-13 23:34:35 +00:00 · 2022-06-13 23:34:35 +00:00 · 7c0f4dd954
commit 7c0f4dd954
parent a229e0ee87 5f7072bd96
56 changed files with 94818 additions and 3406 deletions
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
--- a/examples/verilog/fma/Makefile
+++ b/examples/verilog/fma/Makefile
@ -0,0 +1,23 @@
+# Makefile
+
+CC     = gcc
+CFLAGS = -O3
+LIBS   = -lm
+LFLAGS = -L. 
+# Link against the riscv-isa-sim version of SoftFloat rather than 
+# the regular version to get RISC-V NaN behavior
+IFLAGS   = -I$(RISCV)/riscv-isa-sim/softfloat
+LIBS   = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
+#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
+#LIBS   = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
+SRCS   = $(wildcard *.c)
+
+PROGS = $(patsubst %.c,%,$(SRCS))
+
+all:	$(PROGS)
+
+%: %.c
+	$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
+
+clean: 
+	rm -f $(PROGS)
--- a/examples/verilog/fma/baby_torture.tv
+++ b/examples/verilog/fma/baby_torture.tv
--- a/examples/verilog/fma/baby_torture_rz.tv
+++ b/examples/verilog/fma/baby_torture_rz.tv
--- a/examples/verilog/fma/fma.do
+++ b/examples/verilog/fma/fma.do
@ -0,0 +1,23 @@
+# fma.do 
+#
+# run with vsim -do "do fma.do"
+# add -c before -do for batch simulation
+
+onbreak {resume}
+
+# create library
+vlib worklib
+
+vlog -lint -sv -work worklib fma16.v testbench.v
+vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
+vsim -lib worklib testbenchopt
+
+add wave sim:/testbench_fma16/clk
+add wave sim:/testbench_fma16/reset
+add wave sim:/testbench_fma16/x
+add wave sim:/testbench_fma16/y
+add wave sim:/testbench_fma16/z
+add wave sim:/testbench_fma16/result
+add wave sim:/testbench_fma16/rexpected
+
+run -all
--- a/examples/verilog/fma/fma16.v
+++ b/examples/verilog/fma/fma16.v
@ -0,0 +1,268 @@
+// fma16.sv
+// David_Harris@hmc.edu 26 February 2022
+// 16-bit floating-point multiply-accumulate
+
+// Operation: general purpose multiply, add, fma, with optional negation
+//   If mul=1, p = x * y.  Else p = x.
+//   If add=1, result = p + z.  Else result = p.
+//   If negr or negz = 1, negate result or z to handle negations and subtractions
+//   fadd: mul = 0, add = 1, negr = negz = 0
+//   fsub: mul = 0, add = 1, negr = 0, negz = 1
+//   fmul: mul = 1, add = 0, negr = 0, negz = 0
+//   fmadd:  mul = 1, add = 1, negr = 0, negz = 0
+//   fmsub:  mul = 1, add = 1, negr = 0, negz = 1
+//   fnmadd: mul = 1, add = 1, negr = 1, negz = 0
+//   fnmsub: mul = 1, add = 1, negr = 1, negz = 1
+
+`define FFLEN 16
+`define Nf 10
+`define Ne 5
+`define BIAS 15
+`define EMIN (-(2**(`Ne-1)-1))
+`define EMAX (2**(`Ne-1)-1)
+
+`define NaN 16'h7E00
+`define INF 15'h7C00
+
+// rounding modes *** update
+`define RZ  3'b00
+`define RNE 3'b01
+`define RM  3'b10
+`define RP  3'b11
+
+module fma16(
+  input  logic [`FFLEN-1:0] x, y, z,
+  input  logic        mul, add, negr, negz,
+  input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
+  output logic [`FFLEN-1:0] result);
+ 
+  logic [`Nf:0] xm, ym, zm; // U1.Nf
+  logic [`Ne-1:0]  xe, ye, ze; // B_Ne
+  logic        xs, ys, zs;
+  logic        zs1; // sign before optional negation
+  logic [2*`Nf+1:0] pm; // U2.2Nf
+  logic [`Ne:0]  pe; // B_Ne+1
+  logic        ps;  // sign of product
+  logic [22:0] rm;
+  logic [`Ne+1:0]  re;
+  logic        rs;
+  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
+  logic [`Ne+1:0]  re2;
+
+  unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);  // unpack inputs
+  //signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs);             // handle negations
+  mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps);                       // p = x * y
+  add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs);             // r = z + p
+  postproc16 post(roundmode,  xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result);                 // normalize, round, pack
+endmodule
+
+module mult16(
+  input  logic        mul,
+  input  logic [`Nf:0] xm, ym,
+  input  logic [`Ne-1:0]  xe, ye,
+  input  logic        xs, ys,
+  output logic [2*`Nf+1:0] pm,
+  output logic [`Ne:0]  pe,
+  output logic        ps);
+
+  // only multiply if mul = 1
+  assign pm = mul ? xm * ym : {1'b0, xm, 10'b0};       // multiply mantiassas 
+  assign pe = mul ? xe + ye - `BIAS : {1'b0, xe};      // add exponents, account for bias
+  assign ps = xs ^ ys;                                 // negative if X xor Y are negative
+endmodule
+
+module add16(
+  input  logic        add,
+  input  logic [2*`Nf+1:0] pm,  // U2.2Nf
+  input  logic [`Nf:0] zm, // U1.Nf
+  input  logic [`Ne:0]  pe, // B_Ne+1
+  input  logic [`Ne-1:0]  ze, // B_Ne
+  input  logic        ps, zs, 
+  input  logic        negz,
+  output logic [22:0] rm,
+  output logic [`Ne+1:0]  re, // B_Ne+2
+  output logic [`Ne+1:0]  re2,
+  output logic        rs);
+
+  logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
+  logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
+  logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.  
+  logic [`Nf-1:0] prezsticky;
+  logic           zsticky;
+  logic          effectivesub;
+  logic           rs0;
+  logic [`Ne:0]     leadingzeros, NormCnt; // *** should paramterize size
+  logic [`Ne:0]   re1;
+
+  // Alignment shift
+  assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
+  assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
+  always_comb // AlignCount mux; see Muller page 254
+    if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7;         re = {1'b0, pe}; end
+    else if (ExpDiff <= 2)       begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
+    else if (ExpDiff <= `Nf+3)   begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
+    else                         begin AlignCnt = 0;                 re = {2'b0, ze}; end
+  // Shift Zm right by AlignCnt.  Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
+  assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
+  assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
+  
+  // Effective subtraction
+  assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
+  assign zalignedaddsub = effectivesub ? ~zaligned : zaligned;  // invert zaligned for subtraction
+
+  // Adder
+  assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
+  assign rs0 = r[`Nf*3+7]; // sign of the initial result
+  assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
+
+  // Sign Logic
+  assign rs = ps ^ rs0; // flip the sign if necessary
+
+  // Leading zero counter
+  lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
+  assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
+
+  // Normalization shift
+  always_comb // NormCount mux
+    if (ExpDiff < 3) begin 
+      if (re1 >= `EMIN) begin  NormCnt = `Nf + 3 + leadingzeros;  re2 = {1'b0, re1}; end
+      else              begin  NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN;    end
+    end else            begin  NormCnt = AlignCnt; re = {2'b00, ze};                  end
+  assign rnormed = r2 << NormCnt; // *** update sticky
+  /* temporarily comment out to start synth
+
+  // One-bit secondary normalization
+  if (ExpDiff <= 2)          begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
+  else begin // *** handle sticky
+    if (rnormed[***])        begin rnormed2 = rnormed >> 1; re2 = re+1; end
+    else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re;        end
+    else                     begin rnormed2 = rnormed << 1; re2 = re-1; end
+  end
+
+  // round
+  assign l = rnormed2[***]; // least significant bit 
+  assign r = rnormed2[***-1]; // rounding bit
+  assign s = ***; // sticky bit
+  always_comb
+    case (roundmode)
+      RZ: roundup = 0;
+      RP: roundup = ~rs & (r | s); 
+      RM: roundup = rs & (r | s);
+      RNE: roundup = r & (s | l);
+      default: roundup = 0;
+    endcase
+  assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
+*/
+
+  // *** need to handle rounding to MAXNUM vs. INFINITY
+  
+  // add or pass product through
+ /* assign rm = add ? arm : {1'b0, pm};
+  assign re = add ? are : {1'b0, pe};
+  assign rs = add ? ars : ps; */
+endmodule
+
+module lzc(
+  input  logic [`Nf*3+7:0] r2,
+  output logic [`Ne:0]   leadingzeros
+);
+
+endmodule
+
+
+module postproc16(
+  input  logic [1:0] roundmode,
+  input  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
+  input  logic [22:0] rm, 
+  input  logic [`Nf:0] zm, // U1.Nf
+  input  logic [6:0]  re, 
+  input  logic [`Ne-1:0]  ze, // B_Ne
+  input  logic        rs, zs, ps,
+  input  logic [`Ne+1:0]  re2,
+  output logic [15:0] result);
+
+  logic [9:0] uf, uff;
+  logic [6:0] ue;
+  logic [6:0] ueb, uebiased;
+  logic       invalid;
+
+    // Special cases
+  // *** not handling signaling NaN
+  // *** also add overflow/underflow/inexact
+  always_comb begin
+    if (xnan | ynan | znan)                    begin result = `NaN; invalid = 0; end // propagate NANs
+    else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
+    else if (xzero & yinf | xinf & yzero)      begin result = `NaN; invalid = 1; end // zero times infinity
+    else if (xinf | yinf)                      begin result = {ps, `INF}; invalid = 0; end // X or Y
+    else if (zinf)                             begin result = {zs, `INF}; invalid = 0; end // infinite Z
+    else if (xzero | yzero)                    begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
+    else if (re2 >= `EMAX)                     begin result = {rs, `INF}; invalid = 0; end
+    else                                       begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
+  end
+  
+  always_comb 
+    if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
+        ue = re + 7'b1;
+        uf = rm[20:11];
+    end else begin // no normalization shift needed
+        ue = re;
+        uf = rm[19:10];
+    end
+
+  // overflow
+  always_comb begin
+    ueb = ue-7'd15;
+    if (ue >= 7'd46) begin // overflow
+/*      uebiased = 7'd30;
+      uff = 10'h3ff; */
+    end else begin
+      uebiased = ue-7'd15;
+      uff = uf;
+    end
+  end
+  
+  assign result = {rs, uebiased[4:0], uff};
+
+  // add special case handling for zeros, NaN, Infinity
+endmodule
+
+module signadj16(
+  input  logic negr, negz,
+  input  logic xs, ys, zs1,
+  output logic ps, zs);
+
+  assign ps = xs ^ ys; // sign of product
+  assign zs = zs1 ^ negz; // sign of addend
+endmodule
+
+module unpack16(
+  input  logic [15:0] x, y, z,
+  output logic [10:0] xm, ym, zm,
+  output logic [4:0]  xe, ye, ze,
+  output logic        xs, ys, zs,
+  output logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
+
+  unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
+  unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
+  unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
+endmodule
+
+module unpacknum16(
+  input logic  [15:0] num,
+  output logic [10:0] m,
+  output logic [4:0]  e,
+  output logic        s, 
+  output logic        zero, inf, nan);
+
+  logic [9:0] f;  // fraction without leading 1
+  logic [4:0] eb; // biased exponent
+
+  assign {s, eb, f} = num; // pull bit fields out of floating-point number
+  assign m = {1'b1, f}; // prepend leading 1 to fraction
+  assign e = eb;   // leave bias in exponent ***
+  assign zero = (e == 0 && f == 0);
+  assign inf = (e == 31 && f == 0);
+  assign nan = (e == 31 && f != 0);
+endmodule
+
+
--- a/examples/verilog/fma/fma16_template.v
+++ b/examples/verilog/fma/fma16_template.v
@ -0,0 +1,24 @@
+// fma16.sv
+// David_Harris@hmc.edu 26 February 2022
+// 16-bit floating-point multiply-accumulate
+
+// Operation: general purpose multiply, add, fma, with optional negation
+//   If mul=1, p = x * y.  Else p = x.
+//   If add=1, result = p + z.  Else result = p.
+//   If negr or negz = 1, negate result or z to handle negations and subtractions
+//   fadd: mul = 0, add = 1, negr = negz = 0
+//   fsub: mul = 0, add = 1, negr = 0, negz = 1
+//   fmul: mul = 1, add = 0, negr = 0, negz = 0
+//   fmadd:  mul = 1, add = 1, negr = 0, negz = 0
+//   fmsub:  mul = 1, add = 1, negr = 0, negz = 1
+//   fnmadd: mul = 1, add = 1, negr = 1, negz = 0
+//   fnmsub: mul = 1, add = 1, negr = 1, negz = 1
+
+module fma16(
+  input  logic [15:0] x, y, z,
+  input  logic        mul, add, negr, negz,
+  input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
+  output logic [15:0] result);
+ 
+endmodule
+
--- a/examples/verilog/fma/fma16_testgen.c
+++ b/examples/verilog/fma/fma16_testgen.c
@ -0,0 +1,240 @@
+#include <stdio.h>
+#include <stdint.h>
+#include "softfloat.h"
+#include "softfloat_types.h"
+
+typedef union sp {
+  float32_t v;
+  float f;
+} sp;
+
+// lists of tests, terminated with 0x8000
+uint16_t easyExponents[] = {15, 0x8000};
+uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
+uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
+uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
+uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000}; 
+uint16_t zeros[] = {0x0000, 0x8000};
+uint16_t infs[] = {0x7C00, 0xFC00};
+uint16_t nans[] = {0x7D00, 0x7D01};
+
+void softfloatInit(void) {
+    softfloat_roundingMode = softfloat_round_minMag; 
+    softfloat_exceptionFlags = 0;
+    softfloat_detectTininess = softfloat_tininess_beforeRounding;
+}
+
+float convFloat(float16_t f16) {
+    float32_t f32;
+    float res;
+    sp r;
+
+    f32 = f16_to_f32(f16);
+    r.v = f32;
+    res = r.f;
+    return res;
+}
+
+void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    float16_t result;
+    int op, flagVals;
+    char calc[80], flags[80];
+    float32_t x32, y32, z32, r32;
+    float xf, yf, zf, rf;
+    float16_t smallest;
+
+    if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
+    if (!add) z.v = 0x0000; // force z to 0 to avoid add
+    if (negp) x.v ^= 0x8000; // flip sign of x to negate p
+    if (negz) z.v ^= 0x8000; // flip sign of z to negate z
+    op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
+//    printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
+    softfloat_exceptionFlags = 0; // clear exceptions
+    result = f16_mulAdd(x, y, z);
+
+    sprintf(flags, "NV: %d OF: %d UF: %d NX: %d", 
+        (softfloat_exceptionFlags >> 4) % 2,
+        (softfloat_exceptionFlags >> 2) % 2,
+        (softfloat_exceptionFlags >> 1) % 2,
+        (softfloat_exceptionFlags) % 2);
+    // pack these four flags into one nibble, discarding DZ flag
+    flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
+
+
+    // convert to floats for printing
+    xf = convFloat(x);
+    yf = convFloat(y);
+    zf = convFloat(z);
+    rf = convFloat(result);
+    if (mul)
+        if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
+        else     sprintf(calc, "%f * %f = %f", xf, yf, rf);
+    else         sprintf(calc, "%f + %f = %f", xf, zf, rf);
+
+    // omit denorms, which aren't required for this project
+    smallest.v = 0x0400;
+    float16_t resultmag = result;
+    resultmag.v &= 0x7FFF; // take absolute value
+    if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
+    if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
+    if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed)  fprintf(fptr, "// Skip inf: ");
+    if (resultmag.v >  0x7C00 && !nanAllowed)  fprintf(fptr, "// Skip NaN: ");
+    fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
+}
+
+void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases, 
+               FILE *fptr, int *numCases) {
+    int i, j;
+
+    fprintf(fptr, desc); fprintf(fptr, "\n");
+    *numCases=0;
+    for (i=0; e[i] != 0x8000; i++)
+        for (j=0; f[j] != 0x8000; j++) {
+            cases[*numCases].v = f[j] | e[i]<<10;
+            *numCases = *numCases + 1;
+        }
+}
+
+void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    z.v = 0x0000;
+    for (i=0; i < numCases; i++) { 
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            y.v = cases[j].v;
+            for (k=0; k<=sgn; k++) {
+                y.v ^= (k<<15);
+                genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    y.v = 0x0000;
+    for (i=0; i < numCases; i++) {
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            z.v = cases[j].v;
+            for (k=0; k<=sgn; k++) {
+                z.v ^= (k<<15);
+                genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+
+void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, l, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    for (i=0; i < numCases; i++) {
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            y.v = cases[j].v;
+            for (k=0; k<numCases; k++) {
+                z.v = cases[k].v;
+                for (l=0; l<=sgn; l++) {
+                    z.v ^= (l<<15);
+                    genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+                }
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
+    int i, j, k, sx, sy, sz, numCases;
+    float16_t x, y, z;
+    float16_t cases[100000];
+    FILE *fptr;
+    char fn[80];
+ 
+    sprintf(fn, "work/%s.tv", testName);
+    fptr = fopen(fn, "w");
+    prepTests(e, f, testName, desc, cases, fptr, &numCases);
+    cases[numCases].v = 0x0000; // add +0 case
+    cases[numCases+1].v = 0x8000; // add -0 case
+    numCases += 2; 
+    for (i=0; i < numCases; i++) {
+        x.v = cases[i].v;
+        for (j=0; j<numCases; j++) {
+            y.v = cases[j].v;
+            for (k=0; k<numCases; k++) {
+                z.v = cases[k].v;
+                for (sx=0; sx<=sgn; sx++) {
+                    x.v ^= (sx<<15);
+                    for (sy=0; sy<=sgn; sy++) {
+                        y.v ^= (sy<<15);
+                        for (sz=0; sz<=sgn; sz++) {
+                            z.v ^= (sz<<15);
+                            genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    fclose(fptr);
+}
+
+
+int main()
+{
+    softfloatInit(); // configure softfloat modes
+ 
+    // Test cases: multiplication
+    genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
+    genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
+    genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
+
+    // Test cases: addition
+    genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
+    genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
+    genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
+
+    // Test cases: FMA
+    genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
+    genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
+    genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
+
+    // Test cases: Zero, Infinity, NaN
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
+ 
+    // Full test cases with other rounding modes
+    softfloat_roundingMode = softfloat_round_near_even; 
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
+    softfloat_roundingMode = softfloat_round_min; 
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
+    softfloat_roundingMode = softfloat_round_max; 
+    genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
+  
+    return 0;
+}
--- a/examples/verilog/fma/lint-fma
+++ b/examples/verilog/fma/lint-fma
@ -0,0 +1,8 @@
+#!/bin/bash
+# check for warnings in Verilog code
+# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
+export PATH=$PATH:/usr/local/bin/
+verilator=`which verilator`
+
+basepath=$(dirname $0)/..
+$verilator --lint-only --top-module fma16 fma16.v
--- a/examples/verilog/fma/sim-fma
+++ b/examples/verilog/fma/sim-fma
@ -0,0 +1,2 @@
+vsim -do "do fma.do"
+
--- a/examples/verilog/fma/sim-fma-batch
+++ b/examples/verilog/fma/sim-fma-batch
@ -0,0 +1 @@
+vsim -c -do "do fma.do"
--- a/examples/verilog/fma/synth
+++ b/examples/verilog/fma/synth
@ -0,0 +1 @@
+make -C ../../../synthDC synth DESIGN=fma16
--- a/examples/verilog/fma/testbench.v
+++ b/examples/verilog/fma/testbench.v
@ -0,0 +1,52 @@
+/* verilator lint_off STMTDLY */
+module testbench_fma16;
+  reg        clk, reset;
+  reg [15:0] x, y, z, rexpected;
+  wire [15:0] result;
+  reg [7:0]  ctrl;
+  reg [3:0]  flagsexpected;
+  reg        mul, add, negp, negz;
+  reg [1:0]  roundmode;
+  reg [31:0] vectornum, errors;
+  reg [75:0] testvectors[10000:0];
+
+  // instantiate device under test
+  fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
+
+  // generate clock
+  always 
+    begin
+      clk = 1; #5; clk = 0; #5;
+    end
+
+  // at start of test, load vectors and pulse reset
+  initial
+    begin
+      $readmemh("work/fmul_0.tv", testvectors);
+      vectornum = 0; errors = 0;
+      reset = 1; #22; reset = 0;
+    end
+
+  // apply test vectors on rising edge of clk
+  always @(posedge clk)
+    begin
+      #1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
+      {roundmode, mul, add, negp, negz} = ctrl[5:0];
+    end
+
+  // check results on falling edge of clk
+  always @(negedge clk)
+    if (~reset) begin // skip during reset
+      if (result !== rexpected) begin  // check result     // *** should also add tests on flags eventually
+        $display("Error: inputs %h * %h + %h", x, y, z);
+        $display("  result = %h (%h expected)", result, rexpected);
+        errors = errors + 1;
+      end
+      vectornum = vectornum + 1;
+      if (testvectors[vectornum] === 'x) begin 
+        $display("%d tests completed with %d errors", 
+	           vectornum, errors);
+        $stop;
+      end
+    end
+endmodule
--- a/examples/verilog/fma/torture.tv
+++ b/examples/verilog/fma/torture.tv
--- a/examples/verilog/fma/torturegen.pl
+++ b/examples/verilog/fma/torturegen.pl
@ -0,0 +1,130 @@
+#!/usr/bin/perl -w
+# torturegen.pl 
+# David_Harris@hmc.edu 19 April 2022
+# Convert TestFloat cases into format for fma16 project torture test
+# Strip out cases involving denorms
+
+use strict;
+
+my @basenames = ("add", "mul", "mulAdd");
+my @roundingmodes = ("rz", "rd", "ru", "rne");
+my @names = ();
+foreach my $name (@basenames) {
+    foreach my $mode (@roundingmodes) {
+        push(@names, "f16_${name}_$mode.tv");
+    }
+}
+
+open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
+my $datestring = localtime();
+print(TORTURE "// Torture tests generated $datestring by $0\n");
+foreach my $tv (@names) {
+    open(TV, "work/$tv") || die("Can't read $tv");
+    my $type = &getType($tv); # is it mul, add, mulAdd
+    my $rm = &getRm($tv); # rounding mode
+#   if ($rm != 0) { next; } # only do rz
+    print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
+    print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
+    my $linecount = 0;
+    my $babyTorture = 0;
+    while (<TV>) {
+        my $line = $_;
+        $linecount++;
+        my $density = 10;
+        if ($type eq "mulAdd") {$density = 500;}
+        if ($babyTorture) {
+            $density = 100;
+            if ($type eq "mulAdd") {$density = 50000;}
+        }
+        if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
+        chomp($line); # strip off newline
+        my @parts = split(/_/, $line);
+        my ($x, $y, $z, $op, $w, $flags);
+        $x = $parts[0];
+        if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
+        if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
+        $op = $rm << 4;
+        if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
+        if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
+        my $opname = sprintf("%02x", $op);
+        if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
+        if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
+        $flags = substr($flags, -1); # take last character
+        if (&fpval($w) eq "NaN") { $w = "7e00"; }
+        my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
+        my $skip = "";
+        if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
+            $skip = "Skipped denorm";
+        }
+        my $summary = &summary($x, $y, $z, $w, $type);
+        if ($skip ne "") {
+            print TORTURE "// $skip $tv line $linecount $line $summary\n"
+        }
+        else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
+    }
+    close(TV);
+}
+close(TORTURE);
+
+sub fpval {
+    my $val = shift;
+    $val = hex($val); # convert hex string to number
+    my $frac = $val & 0x3FF;
+    my $exp = ($val >> 10) & 0x1F;
+    my $sign = $val >> 15;
+
+    my $res;
+    if ($exp == 31 && $frac != 0) { return "NaN"; }
+    elsif ($exp == 31) { $res = "INF"; }
+    elsif ($val == 0) { $res = 0; }
+    elsif ($exp == 0) { $res = "Denorm"; }
+    else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
+
+    if ($sign == 1) { $res = "-$res"; }
+    return $res;
+}
+
+sub summary {
+    my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
+
+    my $xv = &fpval($x);
+    my $yv = &fpval($y);
+    my $zv = &fpval($z);
+    my $wv = &fpval($w);
+
+    if ($type eq "add") { return "$xv + $zv = $wv"; }
+    elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
+    else {return "$xv * $yv + $zv = $wv"; }
+}
+
+sub getType {
+    my $tv = shift;
+
+    if ($tv =~ /mulAdd/) { return("mulAdd"); }
+    elsif ($tv =~ /mul/) { return "mul"; }
+    else { return "add"; }
+}
+
+sub getRm {
+    my $tv = shift;
+
+    if ($tv =~ /rz/) { return 0; }
+    elsif ($tv =~ /rne/) { return 1; }
+    elsif ($tv =~ /rd/) {return 2; }
+    elsif ($tv =~ /ru/) { return 3; }
+    else { return "bad"; }
+}
+
+sub isdenorm {
+    my $fp = shift;
+    my $val = hex($fp);
+    my $expv = $val >> 10;
+    $expv = $expv & 0x1F;
+    my $denorm = 0;
+    if ($expv == 0 && $val != 0) { $denorm = 1;}
+ #   my $e0 = ($expv == 0);
+ #   my $vn0 = ($val != 0);
+ #   my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
+ #   print("Num $fp Exp $expv Denorm $denorm Done\n");
+    return $denorm;
+}
--- a/examples/verilog/fma/wave.do
+++ b/examples/verilog/fma/wave.do
@ -0,0 +1,62 @@
+onerror {resume}
+quietly WaveActivateNextPane {} 0
+add wave -noupdate /testbench_fma16/clk
+add wave -noupdate /testbench_fma16/reset
+add wave -noupdate /testbench_fma16/x
+add wave -noupdate /testbench_fma16/y
+add wave -noupdate /testbench_fma16/z
+add wave -noupdate /testbench_fma16/result
+add wave -noupdate /testbench_fma16/rexpected
+add wave -noupdate /testbench_fma16/dut/x
+add wave -noupdate /testbench_fma16/dut/y
+add wave -noupdate /testbench_fma16/dut/z
+add wave -noupdate /testbench_fma16/dut/mul
+add wave -noupdate /testbench_fma16/dut/add
+add wave -noupdate /testbench_fma16/dut/negr
+add wave -noupdate /testbench_fma16/dut/negz
+add wave -noupdate /testbench_fma16/dut/roundmode
+add wave -noupdate /testbench_fma16/dut/result
+add wave -noupdate /testbench_fma16/dut/XManE
+add wave -noupdate /testbench_fma16/dut/YManE
+add wave -noupdate /testbench_fma16/dut/ZManE
+add wave -noupdate /testbench_fma16/dut/XExpE
+add wave -noupdate /testbench_fma16/dut/YExpE
+add wave -noupdate /testbench_fma16/dut/ZExpE
+add wave -noupdate /testbench_fma16/dut/PExpE
+add wave -noupdate /testbench_fma16/dut/Ne
+add wave -noupdate /testbench_fma16/dut/upOneExt
+add wave -noupdate /testbench_fma16/dut/XSgnE
+add wave -noupdate /testbench_fma16/dut/YSgnE
+add wave -noupdate /testbench_fma16/dut/ZSgnE
+add wave -noupdate /testbench_fma16/dut/PSgnE
+add wave -noupdate /testbench_fma16/dut/ProdManE
+add wave -noupdate /testbench_fma16/dut/NfracS
+add wave -noupdate /testbench_fma16/dut/ProdManAl
+add wave -noupdate /testbench_fma16/dut/ZManExt
+add wave -noupdate /testbench_fma16/dut/ZManAl
+add wave -noupdate /testbench_fma16/dut/Nfrac
+add wave -noupdate /testbench_fma16/dut/res
+add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
+add wave -noupdate /testbench_fma16/dut/NSamt
+add wave -noupdate /testbench_fma16/dut/ZExpGreater
+add wave -noupdate /testbench_fma16/dut/ACLess
+add wave -noupdate /testbench_fma16/dut/upOne
+add wave -noupdate /testbench_fma16/dut/KillProd
+TreeUpdate [SetDefaultTree]
+WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
+quietly wave cursor active 2
+configure wave -namecolwidth 237
+configure wave -valuecolwidth 64
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+configure wave -gridoffset 0
+configure wave -gridperiod 1
+configure wave -griddelta 40
+configure wave -timeline 0
+configure wave -timelineunits ns
+update
+WaveRestoreZoom {4083 ns} {4235 ns}
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -55,20 +55,22 @@
 `define Q_NE 32'd15
 `define Q_NF 32'd112
 `define Q_BIAS 32'd16383
+`define Q_FMT 2'd3
 `define D_LEN 32'd64
 `define D_NE 32'd11
 `define D_NF 32'd52
 `define D_BIAS 32'd1023
-`define D_FMT 32'd1
+`define D_FMT 2'd1
 `define S_LEN 32'd32
 `define S_NE 32'd8
 `define S_NF 32'd23
 `define S_BIAS 32'd127
-`define S_FMT 32'd1
+`define S_FMT 2'd0
 `define H_LEN 32'd16
 `define H_NE 32'd5
 `define H_NF 32'd10
 `define H_BIAS 32'd15
+`define H_FMT 2'd2

 // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
 `define FLEN (`Q_SUPPORTED ? `Q_LEN  : `D_SUPPORTED ? `D_LEN  : `F_SUPPORTED ? `S_LEN  : `H_LEN)
@ -91,6 +93,12 @@
 `define FMT2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0    : 2'd2)
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)

+// largest length in IEU/FPU
+`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
+`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
+`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
+`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
+
 // Disable spurious Verilator warnings

 /* verilator lint_off STMTDLY */
--- a/pipelined/regression/sim-testfloat
+++ b/pipelined/regression/sim-testfloat
@ -9,4 +9,4 @@
 # sqrt   - test square ro
 # all    - test everything

-vsim -do "do testfloat.do rv64fpquad cmp"
+vsim -do "do testfloat.do rv64fp mul"
--- a/pipelined/regression/wave-dos/ahb-muldiv.do
+++ b/pipelined/regression/wave-dos/ahb-muldiv.do
@ -67,6 +67,7 @@ add wave -hex /testbench/dut/core/ebu/HTRANS
 add wave -hex /testbench/dut/core/ebu/HRDATA
 add wave -hex /testbench/dut/core/ebu/HWRITE
 add wave -hex /testbench/dut/core/ebu/HWDATA
+add wave -hex /testbench/dut/core/ebu/HBURST
 add wave -hex /testbench/dut/core/ebu/CaptureDataM
 add wave -divider

--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@ -1,102 +1,9 @@

 add wave -noupdate /testbenchfp/clk
 add wave -noupdate -radix decimal /testbenchfp/VectorNum
-add wave -group Other -noupdate /testbenchfp/FrmNum
-add wave -group Other -noupdate /testbenchfp/X
-add wave -group Other -noupdate /testbenchfp/Y
-add wave -group Other -noupdate /testbenchfp/Z
-add wave -group Other -noupdate /testbenchfp/Res
-add wave -group Other -noupdate /testbenchfp/Ans
-
-add wave -group Rne -noupdate /testbenchfp/FmaRneX
-add wave -group Rne -noupdate /testbenchfp/FmaRneY
-add wave -group Rne -noupdate /testbenchfp/FmaRneZ
-add wave -group Rne -noupdate /testbenchfp/FmaRneRes
-add wave -group Rne -noupdate /testbenchfp/FmaRneAns
-add wave -group Rz -noupdate /testbenchfp/FmaRzX
-add wave -group Rz -noupdate /testbenchfp/FmaRzY
-add wave -group Rz -noupdate /testbenchfp/FmaRzZ
-add wave -group Rz -noupdate /testbenchfp/FmaRzRes
-add wave -group Rz -noupdate /testbenchfp/FmaRzAns
-add wave -group Ru -noupdate /testbenchfp/FmaRuX
-add wave -group Ru -noupdate /testbenchfp/FmaRuY
-add wave -group Ru -noupdate /testbenchfp/FmaRuZ
-add wave -group Ru -noupdate /testbenchfp/FmaRuRes
-add wave -group Ru -noupdate /testbenchfp/FmaRuAns
-add wave -group Rd -noupdate /testbenchfp/FmaRdX
-add wave -group Rd -noupdate /testbenchfp/FmaRdY
-add wave -group Rd -noupdate /testbenchfp/FmaRdZ
-add wave -group Rd -noupdate /testbenchfp/FmaRdRes
-add wave -group Rd -noupdate /testbenchfp/FmaRdAns
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmX
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmY
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes
-add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns
-add wave -group AllSignals -noupdate /*
-add wave -group AllSignals -noupdate /testbenchfp/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/*
-add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/*
-add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/*
+add wave -noupdate /testbenchfp/FrmNum
+add wave -noupdate /testbenchfp/X
+add wave -noupdate /testbenchfp/Y
+add wave -noupdate /testbenchfp/Z
+add wave -noupdate /testbenchfp/Res
+add wave -noupdate /testbenchfp/Ans
--- a/pipelined/regression/wave.do
+++ b/pipelined/regression/wave.do
@ -473,6 +473,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead
 add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr
 add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck
 add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA
+add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete
 add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF
 add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState
 add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0
--- a/pipelined/src/ebu/ahblite.sv
+++ b/pipelined/src/ebu/ahblite.sv
@ -45,6 +45,10 @@ module ahblite (
  input logic 				 IFUBusRead,
  output logic [`XLEN-1:0] 	 IFUBusHRDATA,
  output logic 				 IFUBusAck,
+  output logic         IFUBusInit,
+  input logic [2:0]    IFUBurstType,
+  input logic [1:0]    IFUTransType,
+  input logic          IFUTransComplete,
  // Signals from Data Cache
  input logic [`PA_BITS-1:0] LSUBusAdr,
  input logic 				 LSUBusRead, 
@ -52,7 +56,11 @@ module ahblite (
  input logic [`XLEN-1:0] 	 LSUBusHWDATA,
  output logic [`XLEN-1:0] 	 LSUBusHRDATA,
  input logic [2:0] 		 LSUBusSize,
+  input logic [2:0]      LSUBurstType,
+  input logic [1:0]    LSUTransType,
+  input logic          LSUTransComplete,
  output logic 				 LSUBusAck,
+  output logic         LSUBusInit,
  // AHB-Lite external signals
  (* mark_debug = "true" *) input logic [`AHBW-1:0] HRDATA,
  (* mark_debug = "true" *) input logic HREADY, HRESP,
@ -87,6 +95,9 @@ module ahblite (
  // Data accesses have priority over instructions.  However, if a data access comes
  // while an instruction read is occuring, the instruction read finishes before
  // the data access can take place.
+  //  *** This is no longer true when adding burst mode. We need to finish the current
+  //  read before doing another read. Need to work this out, but preliminarily we can
+  //  store the current read type in a flop and use that to figure out what burst type to use.

  flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState);

@ -100,19 +111,21 @@ module ahblite (
  // interface that might be used in place of the ahblite.
  always_comb 
    case (BusState) 
-      IDLE: if (LSUBusRead)      NextBusState = MEMREAD;  // Memory has priority over instructions
-            else if (LSUBusWrite)NextBusState = MEMWRITE;
-            else if (IFUBusRead)   NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;
-      MEMREAD: if (~HREADY)        NextBusState = MEMREAD;
-            else if (IFUBusRead)   NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;
-      MEMWRITE: if (~HREADY)       NextBusState = MEMWRITE;
-            else if (IFUBusRead)   NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;
-      INSTRREAD: if (~HREADY)      NextBusState = INSTRREAD;
-            else                   NextBusState = IDLE;  // if (IFUBusRead still high) *** need to wait?
-      default:                     NextBusState = IDLE;
+      IDLE: if (LSUBusRead)                               NextBusState = MEMREAD;  // Memory has priority over instructions
+            else if (LSUBusWrite)                         NextBusState = MEMWRITE;
+            else if (IFUBusRead)                          NextBusState = INSTRREAD;
+            else                                          NextBusState = IDLE;
+      MEMREAD: if (LSUTransComplete & IFUBusRead)         NextBusState = INSTRREAD;
+               else if (LSUTransComplete)                 NextBusState = IDLE;
+               else                                       NextBusState = MEMREAD;
+      MEMWRITE: if (LSUTransComplete & IFUBusRead)        NextBusState = INSTRREAD;
+                else if (LSUTransComplete)                NextBusState = IDLE;
+                else                                      NextBusState = MEMWRITE;
+      INSTRREAD: if (IFUTransComplete & LSUBusRead)       NextBusState = MEMREAD;
+                 else if (IFUTransComplete & LSUBusWrite) NextBusState = MEMWRITE;
+                 else if (IFUTransComplete)               NextBusState = IDLE;
+                 else                                     NextBusState = INSTRREAD;
+      default:                                            NextBusState = IDLE;
    endcase


@ -122,7 +135,7 @@ module ahblite (
  assign #1 HADDR = AccessAddress;
  assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
  assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize;
-  assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
+  assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.

  /* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE.
        000: Single (SINGLE)
@ -133,15 +146,16 @@ module ahblite (
        101: 8-beat incrementing burst (INCR8)
        110: 16-beat wrapping burst (WRAP16) [wraps if X in 0X000000]
        111: 16-beat incrementing burst (INCR16)
-  */
+        *** Remove if not necessary
+  */ 


  assign HPROT = 4'b0011; // not used; see Section 3.7
-  assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
+  assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
  assign HMASTLOCK = 0; // no locking supported
-  assign HWRITE = NextBusState == MEMWRITE;
+  assign HWRITE = (NextBusState == MEMWRITE);
  // delay write data by one cycle for
-  flop #(`XLEN) wdreg(HCLK, LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
+  flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
  // delay signals for subword writes
  flop #(3)   adrreg(HCLK, HADDR[2:0], HADDRD);
  flop #(4)   sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED);
@ -153,7 +167,9 @@ module ahblite (
 
  assign IFUBusHRDATA = HRDATA;
  assign LSUBusHRDATA = HRDATA;
-  assign IFUBusAck = (BusState == INSTRREAD) & (NextBusState != INSTRREAD);
-  assign LSUBusAck = (BusState == MEMREAD) & (NextBusState != MEMREAD) | (BusState == MEMWRITE) & (NextBusState != MEMWRITE);
+  assign IFUBusInit = (BusState != INSTRREAD) & (NextBusState == INSTRREAD);
+  assign LSUBusInit = (((BusState != MEMREAD) & (NextBusState == MEMREAD)) | (BusState != MEMWRITE) & (NextBusState == MEMWRITE));
+  assign IFUBusAck = HREADY & (BusState == INSTRREAD);
+  assign LSUBusAck = HREADY & ((BusState == MEMREAD) | (BusState == MEMWRITE));

 endmodule
--- a/pipelined/src/fpu/cvtshiftcalc.sv
+++ b/pipelined/src/fpu/cvtshiftcalc.sv
@ -0,0 +1,69 @@
+`include "wally-config.vh"
+
+module cvtshiftcalc(
+    input logic                    XZeroM,
+    input logic                    ToInt,
+    input logic                    IntToFp,
+    input logic  [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    input logic  [`NF:0]           XManM,          // input mantissas
+    input logic     [`FMTBITS-1:0]  OutFmt,       // output format
+    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic CvtResDenormUfM,
+    output logic CvtResUf,
+    output logic [`LGLEN+`NF:0]    CvtShiftIn    // number to be shifted
+);
+    logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // shifter
+    ///////////////////////////////////////////////////////////////////////////
+
+    // seclect the input to the shifter
+    //      fp  -> int:
+    //          |  `XLEN  zeros |     Mantissa      | 0's if nessisary |
+    //          Other problems:
+    //              - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
+    //              - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
+    //                  - ex: for the case 0010000.... (double)
+    //      ??? -> fp:
+    //          - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
+    //          - otherwise:
+    //              |     LzcInM      | 0's if nessisary | 
+    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
+                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : 
+                                   {CvtLzcInM, {`NF+1{1'b0}}};
+    
+    
+    // choose the negative of the fraction size
+    if (`FPSIZES == 1) begin
+        assign ResNegNF = -($clog2(`NF)+1)'(`NF); 
+
+    end else if (`FPSIZES == 2) begin
+        assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT:  ResNegNF = -($clog2(`NF)+1)'(`NF);
+                `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
+                `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
+                default: ResNegNF = 1'bx;
+            endcase
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (OutFmt)
+                2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
+                2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
+                2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
+                2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
+            endcase
+    end
+    // determine if the result underflows ??? -> fp
+    //      - if the first 1 is shifted out of the result then the result underflows
+    //      - can't underflow an integer to fp conversions
+    assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
+   
+endmodule
--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@ -2,13 +2,12 @@
 `include "wally-config.vh"

 // FOpCtrlE values
-//    111   min
+//    110   min
 //    101   max
 //    010   equal
 //    001   less than
 //    011   less than or equal

-
 module fcmp (   
   input logic  [`FMTBITS-1:0]   FmtE,           // precision 1 = double 0 = single
   input logic  [2:0]            FOpCtrlE,       // see above table
@ -20,12 +19,13 @@ module fcmp (
   input logic                   XSNaNE, YSNaNE, // is signaling NaN
   input logic  [`FLEN-1:0]      FSrcXE, FSrcYE, // original, non-converted to double, inputs
   output logic                  CmpNVE,         // invalid flag
-   output logic [`FLEN-1:0]      CmpResE         // compare resilt
+   output logic [`FLEN-1:0]      CmpFpResE,         // compare resilt
+   output logic [`XLEN-1:0]      CmpIntResE         // compare resilt
   );

   logic LTabs, LT, EQ; // is X < or > or = Y
   logic [`FLEN-1:0] NaNRes;
-   logic BothZeroE, EitherNaNE, EitherSNaNE;
+   logic BothZero, EitherNaN, EitherSNaN;
   
   assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
   assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
@ -36,9 +36,9 @@ module fcmp (
 //   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
   assign EQ = (FSrcXE == FSrcYE);

-   assign BothZeroE = XZeroE&YZeroE;
-   assign EitherNaNE = XNaNE|YNaNE;
-   assign EitherSNaNE = XSNaNE|YSNaNE;
+   assign BothZero = XZeroE&YZeroE;
+   assign EitherNaN = XNaNE|YNaNE;
+   assign EitherSNaN = XSNaNE|YSNaNE;


   // flags
@ -47,11 +47,11 @@ module fcmp (
   //    EQ - quiet - sets invalid if signaling NaN input
   always_comb begin
      case (FOpCtrlE[2:0])
-         3'b111: CmpNVE = EitherSNaNE;//min 
-         3'b101: CmpNVE = EitherSNaNE;//max
-         3'b010: CmpNVE = EitherSNaNE;//equal
-         3'b001: CmpNVE = EitherNaNE;//less than
-         3'b011: CmpNVE = EitherNaNE;//less than or equal
+         3'b110: CmpNVE = EitherSNaN;//min 
+         3'b101: CmpNVE = EitherSNaN;//max
+         3'b010: CmpNVE = EitherSNaN;//equal
+         3'b001: CmpNVE = EitherNaN;//less than
+         3'b011: CmpNVE = EitherNaN;//less than or equal
         default: CmpNVE = 1'b0;
      endcase
   end 
@ -112,16 +112,12 @@ module fcmp (
            endcase

 // when one input is a NaN -output the non-NaN
-   always_comb
-      case (FOpCtrlE[2:0])
-         3'b111: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
-                                 : YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
-         3'b101: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
-                                 : YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
-         3'b010: CmpResE = {(`FLEN-1)'(0), (EQ|BothZeroE) & ~EitherNaNE}; // Equal
-         3'b001: CmpResE = {(`FLEN-1)'(0), LT & ~BothZeroE & ~EitherNaNE}; // Less than
-         3'b011: CmpResE = {(`FLEN-1)'(0), (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
-         default: CmpResE = (`FLEN)'(0);
-      endcase
+   assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
+                                          : YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE : 
+                                    XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
+                                          : YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
+                                    
+
+   assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
   
 endmodule
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@ -10,99 +10,99 @@ module fctrl (
  output logic       IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
  output logic       FRegWriteD,  // FP register write enable
  output logic       FDivStartD,  // Start division or squareroot
-  output logic [1:0] FResultSelD, // select result to be written to fp register
+  output logic [1:0] FResSelD, // select result to be written to fp register
  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
-  output logic [1:0] FResSelD,    // select one of the results done in the memory stage
-  output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
+  output logic [1:0] PostProcSelD, 
  output logic [`FMTBITS-1:0] FmtD,        // precision - single-0 double-1
  output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
  output logic       FWriteIntD   // is the result written to the integer register
  );

-  `define FCTRLW 13
+  `define FCTRLW 11
  logic [`FCTRLW-1:0] ControlsD;
+  //*** will putting x for don't cares reduce area in synthisis???
  // FPU Instruction Decoder
  always_comb
    if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
-      ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
+      ControlsD = `FCTRLW'b0_0_00_00_000_0_1;
    else case(OpD)
-    // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
+    // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
      7'b0000111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
-                    3'b011:  ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
-                    default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // flw
+                    3'b011:  ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // fld
+                    default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
                  endcase
      7'b0100111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
-                    3'b011:  ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
-                    default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsw
+                    3'b011:  ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsd
+                    default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
                  endcase
-      7'b1000011:   ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
-      7'b1000111:   ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
-      7'b1001011:   ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
-      7'b1001111:   ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
+      7'b1000011:   ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
+      7'b1000111:   ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
+      7'b1001011:   ControlsD = `FCTRLW'b1_0_01_10_010_0_0; // fnmsub
+      7'b1001111:   ControlsD = `FCTRLW'b1_0_01_10_011_0_0; // fnmadd
      7'b1010011: casez(Funct7D)
-                    7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
-                    7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
-                    7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
-                    7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
-                    7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
+                    7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0; // fadd
+                    7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0; // fsub
+                    7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0; // fmul
+                    7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_000_1_0; // fdiv
+                    7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_001_1_0; // fsqrt
                    7'b00100??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
-                                  3'b010:  ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_00_00_000_0_0; // fsgnj
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_00_00_001_0_0; // fsgnjn
+                                  3'b010:  ControlsD = `FCTRLW'b1_0_00_00_010_0_0; // fsgnjx
+                                  default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
                                endcase
                    7'b00101??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_00_00_110_0_0; // fmin
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_00_00_101_0_0; // fmax
+                                  default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
                                endcase
                    7'b10100??: case(Funct3D)
-                                  3'b010:  ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
-                                  3'b001:  ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
-                                  3'b000:  ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                                  3'b010:  ControlsD = `FCTRLW'b0_1_00_00_010_0_0; // feq
+                                  3'b001:  ControlsD = `FCTRLW'b0_1_00_00_001_0_0; // flt
+                                  3'b000:  ControlsD = `FCTRLW'b0_1_00_00_011_0_0; // fle
+                                  default: ControlsD = `FCTRLW'b0_0_00_00_000__0_1; // non-implemented instruction
                                endcase
-                    7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
-                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
-                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
-                                else                            ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
-                    7'b1101000: case(Rs2D[1:0])//***reduce resSel
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w   w->s
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l   l->s
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
+                    7'b11100??: if (Funct3D == 3'b001)          ControlsD = `FCTRLW'b0_1_10_00_000_0_0; // fclass
+                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.w   to int reg
+                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.d   to int reg
+                                else                            ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
+                    7'b1101000: case(Rs2D[1:0])
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.s.w   w->s
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.s.wu wu->s
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.s.l   l->s
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.s.lu lu->s
                                endcase
                    7'b1100000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s   s->w
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s  s->wu
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s   s->l
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s  s->lu
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.s   s->w
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.s  s->wu
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.s   s->l
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.s  s->lu
                                endcase
-                    7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
-                    7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
+                    7'b1111000: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.w.x   to fp reg
+                    7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0; // fcvt.s.d
                    7'b1101001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w   w->d
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l   l->d
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.d.w   w->d
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.d.wu wu->d
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.d.l   l->d
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.d.lu lu->d
                                endcase
                    7'b1100001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d   d->w
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d  d->wu
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d   d->l
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d  d->lu
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.d   d->w
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.d  d->wu
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.d   d->l
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.d  d->lu
                                endcase
-                    7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
-                    7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
-                    default:    ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                    7'b1111001: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.d.x   to fp reg
+                    7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0; // fcvt.d.s
+                    default:    ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
                  endcase
-      default:      ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+      default:      ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
    endcase

  // unswizzle control bits
-  assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
+  assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
  
  // rounding modes:
  //    000 - round to nearest, ties to even
@ -121,82 +121,61 @@ module fctrl (
      assign FmtD = 0;
    else if (`FPSIZES == 2)begin
      logic [1:0] FmtTmp;
-      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
      assign FmtD = (`FMT == FmtTmp);
    end
    else if (`FPSIZES == 3|`FPSIZES == 4)
-      assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];

-      // assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
-  // FResultSel:
-  //    000 - ReadRes - load
-  //    001 - FMARes  - FMA and multiply
-  //    010 - FAddRes - add and fp to fp
-  //    011 - FDivRes - divide and squareroot
-  //    100 - FRes    - anything that is written to the fp register and is ready in the memory stage
-  //        FResSel:
-  //            00 - SrcA   - move to fp register 
-  //            01 - SgnRes - sign injection
-  //            10 - CmpRes - min/max
-  //            11 - CvtRes - convert to fp
-  
-  // FIntResSel:
-  //    00 - CmpRes   - less than, equal, or less than or equal 
-  //    01 - FSrcX    - move to int register
-  //    10 - ClassRes - classify
-  //    11 - CvtRes   - convert to signed/unsigned int
+//  Final Res Sel:
+//        fp      int
+//  00  other     cmp
+//  01  postproc  cvt
+//  10  store     class
+//  11            mv

-  // OpCtrl values: 
-  // div/sqrt
-      //  fdiv  = ???0
-      //  fsqrt = ???1
+//  post processing Sel:
+//  00  cvt
+//  01  div
+//  10  fma

-  // cmp		
-      //  fmin = ?111
-      //  fmax = ?101
-      //  feq  = ?010
-      //  flt  = ?001
-      //  fle  = ?011
-      //  {?,  is min or max,   is eq or le,   is lt or le}
+//  Other Sel:
+//    Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
+//        000 - sign            00
+//        001 - negate sign     00
+//        010 - xor sign        00
+//        011 - mv to fp        01
+//        110 - min             10
+//        101 - max             10

-  //fma/mult	
-      //  fmadd  = ?000
-      //  fmsub  = ?001
-      //  fnmsub = ?010	-(a*b)+c
-      //  fnmadd = ?011 -(a*b)-c
-      //  fmul   = ?100
-      //	{?, is mul, negate product, negate addend}
-
-  // sgn inj
-      //  fsgnj  = ??00
-      //  fsgnjn = ??01
-      //  fsgnjx = ??10
-
-  // add/sub/cnvt
-      //  fadd      = 0000
-      //  fsub      = 0001
-      //  fcvt.s.d  = 0111
-      //  fcvt.d.s  = 0111
-      //  Fmt controls the output for fp -> fp
-      
-  // convert
-      //  fcvt.w.s  = 0010
-      //  fcvt.wu.s = 0110
-      //  fcvt.s.w  = 0001
-      //  fcvt.s.wu = 0101
-      //  fcvt.l.s  = 1010
-      //  fcvt.lu.s = 1110
-      //  fcvt.s.l  = 1001
-      //  fcvt.s.lu = 1101
-      //  fcvt.w.d  = 0010 
-      //  fcvt.wu.d = 0110
-      //  fcvt.d.w  = 0001
-      //  fcvt.d.wu = 0101
-      //  fcvt.l.d  = 1010
-      //  fcvt.lu.d = 1110
-      //  fcvt.d.l  = 1001
-      //  fcvt.d.lu = 1101
-      //  {long, unsigned, to int, from int}
+//  OpCtrl:
+//    Fma: {not multiply-add?, negate prod?, negate Z?}
+//        000 - fmadd
+//        001 - fmsub
+//        010 - fnmsub
+//        011 - fnmadd
+//        100 - mul
+//        110 - add
+//        111 - sub
+//    Div: 
+//        0 - ???
+//        1 - ???
+//    Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
+//    Cvt Fp: output format
+//        10 - to half
+//        00 - to single
+//        01 - to double
+//        11 - to quad
+//    Cmp: {equal?, less than?}
+//        010 - eq
+//        001 - lt
+//        011 - le
+//        110 - min
+//        101 - max
+//    Sgn:
+//        00 - sign
+//        01 - negate sign
+//        10 - xor sign
    

 endmodule
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@ -1,8 +1,5 @@

 `include "wally-config.vh"
-// largest length in IEU/FPU
-`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
-`define LOGLGLEN $unsigned($clog2(`LGLEN+1))

 module fcvt (
    input logic             XSgnE,          // input's sign
@ -13,14 +10,13 @@ module fcvt (
    input logic             FWriteIntE,     // is fp->int (since it's writting to the integer register)
    input logic             XZeroE,         // is the input zero
    input logic             XDenormE,   // is the input denormalized
-    input logic             XInfE,          // is the input infinity
-    input logic             XNaNE,          // is the input a NaN
-    input logic             XSNaNE,         // is the input a signaling NaN
-    input logic [2:0]       FrmE,           // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic [`FMTBITS-1:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
-    output logic [`FLEN-1:0] CvtResE,       // the fp conversion result
-    output logic [`XLEN-1:0] CvtIntResE,    // the int conversion result
-    output logic [4:0]      CvtFlgE         // the conversion's flags
+    output logic [`NE:0]           CvtCalcExpE,    // the calculated expoent
+	output logic [`LOGLGLEN-1:0] CvtShiftAmtE,  // how much to shift by
+    output logic                   CvtResDenormUfE,// does the result underflow or is denormalized
+    output logic                   CvtResSgnE,     // the result's sign
+    output logic                   IntZeroE,      // is the integer zero?
+    output logic [`LGLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
    );

    // OpCtrls:
@ -41,34 +37,8 @@ module fcvt (
    logic [`FMTBITS-1:0]    OutFmt;     // format of the output
    logic [`XLEN-1:0]       PosInt;     // the positive integer input
    logic [`XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
-    logic [`LGLEN-1:0]      LzcIn;      // input to the Leading Zero Counter (priority encoder)
-    logic [`NE:0]           CalcExp;    // the calculated expoent
-	logic [`LOGLGLEN-1:0] ShiftAmt;  // how much to shift by
-    logic [`LGLEN+`NF:0]    ShiftIn;    // number to be shifted
-    logic                   ResDenormUf;// does the result underflow or is denormalized
-    logic                   ResUf;      // does the result underflow
-    logic [`LGLEN+`NF:0]    Shifted;    // the shifted result
    logic [`NE-2:0]         NewBias;    // the bias of the final result
-    logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
    logic [`NE-1:0]	        OldExp;     // the old exponent
-    logic                   ResSgn;     // the result's sign
-    logic                   Sticky;     // sticky bit - for rounding
-    logic                   Round;      // round bit - for rounding
-    logic                   LSBFrac;    // the least significant bit of the fraction - for rounding
-    logic                   CalcPlus1;  // the calculated plus 1
-    logic                   Plus1;      // add one to the final result?
-    logic [`FLEN-1:0]       ShiftedPlus1;   // plus one shifted to the proper position
-    logic [`NE:0]           FullResExp; // the full result exponent (with the overflow bit) 
-    logic [`NE-1:0]         ResExp;     // the result's exponent (trimmed to the correct size)
-    logic [`NF-1:0]         ResFrac;    // the result's fraction
-    logic [`XLEN+1:0]       NegRes;     // the negation of the result
-    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
-    logic                   Overflow, Underflow, Inexact, Invalid; // flags
-    logic                   IntInexact, FpInexact, IntInvalid, FpInvalid;   // flags for FP and int outputs
-    logic [`NE-1:0]         MaxExp;         // the maximum exponent before overflow
-    logic [1:0]             NegResMSBS;     // the negitive integer result's most significant bits
-    logic [`FLEN-1:0]       NaNRes, InfRes, Res, UfRes; //various special results
-    logic                   KillRes;    // kill the result?
    logic                   Signed;     // is the opperation with a signed integer?
    logic                   Int64;      // is the integer 64 bits?
    logic                   IntToFp;       // is the opperation an int->fp conversion?
@ -97,8 +67,9 @@ module fcvt (
    // 1) negate the input if the input is a negitive singed integer
    // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)

-    assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
+    assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE;
    assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
+    assign IntZeroE = ~|TrimInt;

    ///////////////////////////////////////////////////////////////////////////
    // lzc 
@ -107,32 +78,16 @@ module fcvt (
    // choose the input to the leading zero counter i.e. priority encoder
    //             int -> fp : | positive integer | 00000... (if needed) | 
    //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
+    assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
    
-    lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
-
+    lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);

    ///////////////////////////////////////////////////////////////////////////
    // shifter
    ///////////////////////////////////////////////////////////////////////////

-    // seclect the input to the shifter
-    //      fp  -> int:
-    //          |  `XLEN  zeros |     Mantissa      | 0's if nessisary |
-    //          Other problems:
-    //              - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
-    //              - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
-    //                  - ex: for the case 0010000.... (double)
-    //      ??? -> fp:
-    //          - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
-    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
-    //          - otherwise:
-    //              |     lzcIn      | 0's if nessisary | 
-    assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
-                     ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} : 
-                                   {LzcIn, {`NF+1{1'b0}}};
-// kill the shift if it's negitive
+    // kill the shift if it's negitive
    // select the amount to shift by
    //      fp -> int: 
    //          - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
@ -144,47 +99,10 @@ module fcvt (
    //              - only shift fp -> fp if the intital value is denormalized
    //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
    //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
-                    ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] : 
+    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
+                    CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : 
                              (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
    
-    // shift
-    //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
-    //          process:
-    //              - start - CalcExp = 1 + XExp - Largest Bias
-    //                  |  `XLEN  zeros     |     Mantissa      | 0's if nessisary |
-    //
-    //              - shift left 1 (1)
-    //                  | `XLEN-1 zeros |bit|     frac      | 0's if nessisary |
-    //                                      . <- binary point
-    //
-    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
-    //                  |  0's |     Mantissa      |      0's if nessisary     |
-    //                  |     keep          |
-    //
-    //      fp -> fp:
-    //          - if result is denormalized or underflowed:
-    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | << NF+CalcExp-1
-    //          process:
-    //             - start
-    //                 |     mantissa      | 0's |
-    //
-    //             - shift right by NF-1 (NF-1)
-    //                 |  `NF-1  zeros   |     mantissa      | 0's |
-    //
-    //             - shift left by CalcExp = XExp - Largest bias + new bias
-    //                 |   0's  |     mantissa      |     0's      |
-    //                 |       keep      |
-    //
-    //          - if the input is denormalized:
-    //              |     lzcIn      | 0's if nessisary | << ZeroCnt+1
-    //              - plus 1 to shift out the first 1
-    //
-    //      int -> fp: |     lzcIn      | 0's if nessisary | << ZeroCnt+1
-    //              - plus 1 to shift out the first 1
-
-    assign Shifted = ShiftIn << ShiftAmt;
-
    ///////////////////////////////////////////////////////////////////////////
    // exp calculations
    ///////////////////////////////////////////////////////////////////////////
@ -262,40 +180,11 @@ module fcvt (
    //                  - shift left to normilize (-1-ZeroCnt)
    //                  - newBias to make the biased exponent
    //          
-    assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
+    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
    // find if the result is dnormal or underflows
    //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
    //      - can't underflow an integer to Fp conversion
-    assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
-    // choose the negative of the fraction size
-    if (`FPSIZES == 1) begin
-        assign ResNegNF = -($clog2(`NF)+1)'(`NF); 
-
-    end else if (`FPSIZES == 2) begin
-        assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT:  ResNegNF = -($clog2(`NF)+1)'(`NF);
-                `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
-                `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
-                default: ResNegNF = 1'bx;
-            endcase
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (OutFmt)
-                2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
-                2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
-                2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
-                2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
-            endcase
-    end
-    // determine if the result underflows ??? -> fp
-    //      - if the first 1 is shifted out of the result then the result underflows
-    //      - can't underflow an integer to fp conversions
-    assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
+    assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp;

    
    ///////////////////////////////////////////////////////////////////////////
@ -307,498 +196,7 @@ module fcvt (
    //          - if 64-bit : check the msb of the 64-bit integer input and if it's signed
    //          - if 32-bit : check the msb of the 32-bit integer input and if it's signed
    //      - otherwise: the floating point input's sign
-    assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
+    assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;

-    ///////////////////////////////////////////////////////////////////////////
-    // rounding
-    ///////////////////////////////////////////////////////////////////////////
+endmodule

-    // round to nearest even
-    //      {Round, Sticky}
-    //      0x - do nothing
-    //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
-    //      11 - Plus1
-
-    //  round to zero - do nothing
-
-    //  round to -infinity - Plus1 if negative
-
-    //  round to infinity - Plus1 if positive
-
-    //  round to nearest max magnitude
-    //      {Guard, Round, Sticky}
-    //      0x - do nothing
-    //      1x - Plus1
-    // ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
-    if (`FPSIZES == 1) begin
-        assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
-        assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
-        assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
-
-    end else if (`FPSIZES == 2) begin    
-        assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : 
-                        (OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
-        assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : 
-                        OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
-        assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : 
-                        OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
-
-    end else if (`FPSIZES == 3) begin
-        logic ToFpSticky, ToFpRound, ToFpLSBFrac;
-        always_comb
-            case (OutFmt)
-                `FMT:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`NF-`NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
-                end
-                `FMT1:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`NF-`NF1];
-                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
-                end
-                `FMT2:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`NF-`NF2];
-                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
-                end
-                default:  begin 
-                     ToFpSticky = 1'bx;
-                     ToFpRound = 1'bx;
-                     ToFpLSBFrac = 1'bx;
-                end
-            endcase
-            assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
-            assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
-            assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
-
-    end else if (`FPSIZES == 4) begin        
-        logic ToFpSticky, ToFpRound, ToFpLSBFrac;
-        always_comb
-            case (OutFmt)
-                2'h3:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`Q_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
-                end
-                2'h1:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`D_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
-                end
-                2'h0:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`S_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
-                end
-                2'h2:  begin 
-                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
-                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`H_NF];
-                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
-                end
-            endcase
-            assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
-            assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
-            assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
-    end
-
-    always_comb
-        // Determine if you add 1
-        case (FrmE)
-            3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
-            3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = ResSgn;//round down
-            3'b011: CalcPlus1 = ~ResSgn;//round up
-            3'b100: CalcPlus1 = Round;//round to nearest max magnitude
-            default: CalcPlus1 = 1'bx;
-        endcase
-
-    // dont round if exact
-    assign Plus1 = CalcPlus1&(Round|Sticky);
-
-    // shift the 1 to the propper position for rounding
-    //     - dont round it converting to integer
-    if (`FPSIZES == 1) begin
-        assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
-
-    end else if (`FPSIZES == 2) begin
-        assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT:  ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
-                `FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
-                `FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
-                default: ShiftedPlus1 = 0;
-            endcase
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (OutFmt)
-                2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
-                2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
-                2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
-                2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
-            endcase
-    end
-    // kill calcExp if the result is denormalized
-    assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
-    // trim the result's expoent to size
-    assign ResExp = FullResExp[`NE-1:0];
-    ///////////////////////////////////////////////////////////////////////////
-    // flags
-    ///////////////////////////////////////////////////////////////////////////
-    
-    // calculate the flags
-
-    // find the maximum exponent (the exponent and larger overflows)
-    if (`FPSIZES == 1) begin
-        assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
-
-    end else if (`FPSIZES == 2) begin    
-        assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
-                OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
-
-    end else if (`FPSIZES == 3) begin
-        logic [`NE-1:0] MaxExpFp;
-        always_comb
-            case (OutFmt)
-                `FMT:  begin 
-                     MaxExpFp = {`NE{1'b1}};
-                end
-                `FMT1:  begin 
-                     MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
-                end
-                `FMT2:  begin 
-                     MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
-                end
-                default:  begin 
-                     MaxExpFp = 1'bx;
-                end
-            endcase
-            assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
-
-    end else if (`FPSIZES == 4) begin        
-        logic [`NE-1:0] MaxExpFp;
-        always_comb
-            case (OutFmt)
-                2'h3:  begin 
-                     MaxExpFp = {`Q_NE{1'b1}};
-                end
-                2'h1:  begin 
-                     MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
-                end
-                2'h0:  begin 
-                     MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
-                end
-                2'h2:  begin 
-                     MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
-                end
-            endcase
-            assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
-    end
-
-    //                 if the result exponent is larger then the maximum possible exponent
-    //                 |                  and the exponent is positive
-    //                 |                  |             and the input is not NaN or Infinity
-    //                 |                  |             |
-    assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
-
-    //                 if the result is denormalized or underflowed
-    //                 |             and the result did not round into normal values
-    //                 |             |                             and the result is not exact
-    //                 |             |                             |              and the result isn't NaN
-    //                 |             |                             |              |
-    assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
-
-    // we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
-    //                  if there were bits thrown away
-    //                  |            if overflowed or underflowed
-    //                  |            |                    and if not a NaN
-    //                  |            |                    |
-    assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
-
-    //                  if the result is too small to be represented and not 0
-    //                  |                                     and if the result is not invalid (outside the integer bounds)
-    //                  |                                     |
-    assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
-
-    // select the inexact flag to output
-    assign Inexact = ToInt ? IntInexact : FpInexact;
-
-    //                  if an input was a singaling NaN(and we're using a FP input)
-    //                  |
-    assign FpInvalid = (XSNaNE&~IntToFp);
-
-    assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
-			              Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
-    //                  if the input is NaN or infinity
-    //                  |           if the integer result overflows (out of range) 
-    //                  |           |         if the input was negitive but ouputing to a unsigned number
-    //                  |           |         |                    the result doesn't round to zero
-    //                  |           |         |                    |               or the result rounds up out of bounds
-    //                  |           |         |                    |                       and the result didn't underflow
-    //                  |           |         |                    |                       |
-    assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
-    //                                                                                                     |
-    //                                                                                                     or when the positive result rounds up out of range
-    // select the inexact flag to output
-    assign Invalid = ToInt ? IntInvalid : FpInvalid;
-    // pack the flags together
-    //      - fp -> int does not set the overflow or underflow flags
-    assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
-
-
-    ///////////////////////////////////////////////////////////////////////////
-    // result selection
-    ///////////////////////////////////////////////////////////////////////////
-
-    // determine if you shoould kill the result
-    //      - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
-    //      - dont set to zero if fp input is zero but not using the fp input
-    //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
-
-    if (`FPSIZES == 1) begin        
-        // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-        if(`IEEE754) begin
-            assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
-        end else begin 
-            assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
-        end
-        // determine the infinity result
-        //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-        //      - otherwise: output infinity with the correct sign
-        //      - kill the infinity singal if the input isn't fp
-        assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-
-        // result for when the result is killed i.e. underflowes
-        //      - output a rounded 0 with the correct sign
-        assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
-
-        // format the result - NaN box single precision (put 1's in the unused msbs)
-        assign Res   = {ResSgn, ResExp, ResFrac};
-
-
-    end else if (`FPSIZES == 2) begin
-        // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-        if(`IEEE754) begin
-            assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
-        end else begin 
-            assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
-        end
-        // determine the infinity result
-        //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-        //      - otherwise: output infinity with the correct sign
-        //      - kill the infinity singal if the input isn't fp
-        assign InfRes =  OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                                        {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                                 (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
-                                                                                                                                        {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-
-        // result for when the result is killed i.e. underflowes
-        //      - output a rounded 0 with the correct sign
-        assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
-
-        // format the result - NaN box single precision (put 1's in the unused msbs)
-        assign Res   = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
-                    end else begin 
-                        NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {ResSgn, ResExp, ResFrac};
-                end
-                `FMT1: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
-                    end else begin 
-                        NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
-                end
-                `FMT2: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
-                    end else begin 
-                        NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
-                end
-                default: begin
-                    NaNRes = 1'bx;
-                    InfRes = 1'bx;
-                    UfRes  = 1'bx;
-                    Res    = 1'bx;
-                end
-            endcase
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (OutFmt)
-                2'h3: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
-                    end else begin 
-                        NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {ResSgn, ResExp, ResFrac};
-                end
-                2'h1: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
-                    end else begin 
-                        NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
-                end
-                2'h0: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
-                    end else begin 
-                        NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
-                end
-                2'h2: begin
-                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
-                    if(`IEEE754) begin
-                        NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
-                    end else begin 
-                        NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
-                    end
-                    // determine the infinity result
-                    //      - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
-                    //      - otherwise: output infinity with the correct sign
-                    //      - kill the infinity singal if the input isn't fp
-                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
-
-                    // result for when the result is killed i.e. underflowes
-                    //      - output a rounded 0 with the correct sign
-                    UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
-
-                    // format the result - NaN box single precision (put 1's in the unused msbs)
-                    Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
-                end
-            endcase
-    end
-
-    
-    // choose the floating point result
-    //      - if the input is NaN (and using the NaN input) output the NaN result
-    //      - if the input is infinity or the output overflows
-    //      - kill the InfE signal if the input isn't a floating point value
-    //      - if killing the result output the underflow result
-    //      - otherwise output the normal result
-    assign CvtResE = XNaNE&~IntToFp ? NaNRes : 
-                     (XInfE&~IntToFp)|Overflow ? InfRes :
-                     KillRes ? UfRes :
-                     Res;
-    // *** probably can optimize the negation
-    // select the overflow integer result
-    //      - negitive infinity and out of range negitive input
-    //                 |  int  |  long  |
-    //          signed | -2^31 | -2^63  |
-    //        unsigned |   0   |    0   |
-    //
-    //      - positive infinity and out of range negitive input and NaNs
-    //                 |   int  |  long  |
-    //          signed | 2^31-1 | 2^63-1 |
-    //        unsigned | 2^32-1 | 2^64-1 |
-    //
-    //      other: 32 bit unsinged result should be sign extended as if it were a signed number
-    assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
-                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
-                               XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
-                                              {`XLEN{1'b1}};// unsigned positive
-    
-    // round and negate the positive result if needed
-    assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
-    // select the integer output
-    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow result
-    //      - if the input underflows
-    //          - if rounding and signed opperation and negitive input, output -1
-    //          - otherwise output a rounded 0
-    //      - otherwise output the normal result (trmined and sign extended if nessisary)
-    assign CvtIntResE = Invalid ?  OfIntRes :
-			            CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
-                        Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
-
-endmodule
--- a/pipelined/src/fpu/fhazard.sv
+++ b/pipelined/src/fpu/fhazard.sv
@ -34,7 +34,7 @@ module fhazard(
    input logic [4:0]   Adr1E, Adr2E, Adr3E,    // read data adresses
    input logic         FRegWriteM, FRegWriteW, // is the fp register being written to
 	  input logic [4:0]   RdM, RdW,               // the adress being written to
-    input logic [1:0]   FResultSelM,            // the result being selected
+    input logic [1:0]   FResSelM,            // the result being selected
    output logic        FStallD,                // stall the decode stage
    output logic [1:0]  FForwardXE, FForwardYE, FForwardZE // select a forwarded value
 );
@ -47,10 +47,12 @@ module fhazard(
    FForwardZE = 2'b00; // choose FRD3E
    FStallD = 0;

+    //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
+
    // if the needed value is in the memory stage - input 1
    if ((Adr1E == RdM) & FRegWriteM) 
      // if the result will be FResM (can be taken from the memory stage)
-      if(FResultSelM == 2'b11) FForwardXE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
      else FStallD = 1;                             // otherwise stall
    // if the needed value is in the writeback stage
    else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
@ -59,7 +61,7 @@ module fhazard(
    // if the needed value is in the memory stage - input 2
    if ((Adr2E == RdM) & FRegWriteM)
      // if the result will be FResM (can be taken from the memory stage)
-      if(FResultSelM == 2'b11) FForwardYE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
      else FStallD = 1;                             // otherwise stall
    // if the needed value is in the writeback stage
    else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
@ -68,7 +70,7 @@ module fhazard(
    // if the needed value is in the memory stage - input 3
    if ((Adr3E == RdM) & FRegWriteM)
      // if the result will be FResM (can be taken from the memory stage)
-      if(FResultSelM == 2'b11) FForwardZE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
      else FStallD = 1;                             // otherwise stall
    // if the needed value is in the writeback stage
    else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@ -0,0 +1,152 @@
+`include "wally-config.vh"
+
+module flags(
+    input logic                 XSgnM,
+    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
+    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic Plus1,
+    input logic                 InfIn,                  // is a Inf input being used
+    input logic                 XZeroM, YZeroM,         // inputs are zero
+    input logic                 XNaNM, YNaNM,           // inputs are NaN
+    input logic                 NaNIn,                  // is a NaN input being used
+    input logic                 Sqrt,                   // Sqrt?
+    input logic                 ToInt,                  // convert to integer
+    input logic                 IntToFp,                // convert integer to floating point
+    input logic                 Int64,                  // convert to 64 bit integer
+    input logic                 Signed,                 // convert to a signed integer
+    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
+    input logic [`NE:0]         CvtCalcExpM,            // the calculated expoent - Cvt
+    input logic                 CvtOp,                  // conversion opperation?
+    input logic                 DivOp,                  // conversion opperation?
+    input logic                 FmaOp,                  // Fma opperation?
+    input logic  [`NE+1:0]      FullResExp,             // ResExp with bits to determine sign and overflow
+    input logic  [`NE+1:0]      RoundExp,               // exponent of the normalized sum
+    input logic  [1:0]          NegResMSBS,             // the negitive integer result's most significant bits
+    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
+    input logic                 Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
+    output logic                Invalid, Overflow, Underflow, // flags used to select the res
+    output logic [4:0]          PostProcFlgM // flags
+);
+    logic               SigNaN;     // is an input a signaling NaN
+    logic               Inexact;    // inexact flag
+    logic               FpInexact;  // floating point inexact flag
+    logic               IntInexact; // integer inexact flag
+    logic               IntInvalid; // integer invalid flag
+    logic               FmaInvalid; // integer invalid flag
+    logic               DivInvalid; // integer invalid flag
+    logic               DivByZero;
+    logic [`NE-1:0]     MaxExp;     // the maximum exponent before overflow
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Flags
+    ///////////////////////////////////////////////////////////////////////////////
+
+
+
+   if (`FPSIZES == 1) begin
+        assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
+
+    end else if (`FPSIZES == 2) begin    
+        assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
+                OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
+
+    end else if (`FPSIZES == 3) begin
+        logic [`NE-1:0] MaxExpFp;
+        always_comb
+            case (OutFmt)
+                `FMT:  begin 
+                     MaxExpFp = {`NE{1'b1}};
+                end
+                `FMT1:  begin 
+                     MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
+                end
+                `FMT2:  begin 
+                     MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
+                end
+                default:  begin 
+                     MaxExpFp = 1'bx;
+                end
+            endcase
+            assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
+
+    end else if (`FPSIZES == 4) begin        
+        logic [`NE-1:0] MaxExpFp;
+        always_comb
+            case (OutFmt)
+                2'h3:  begin 
+                     MaxExpFp = {`Q_NE{1'b1}};
+                end
+                2'h1:  begin 
+                     MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
+                end
+                2'h0:  begin 
+                     MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
+                end
+                2'h2:  begin 
+                     MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
+                end
+            endcase
+            assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
+    end
+
+    //                 if the result is greater than or equal to the max exponent
+    //                 |                     and the exponent isn't negitive
+    //                 |                     |                   if the input isnt infinity or NaN
+    //                 |                     |                   |            
+    assign Overflow = (FullResExp>={2'b0, MaxExp}) & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
+
+    // detecting tininess after rounding
+    //                  the exponent is negitive
+    //                  |                    the result is denormalized
+    //                  |                    |                    the result is normal and rounded from a denorm
+    //                  |                    |                    |                                      and if given an unbounded exponent the result does not round
+    //                  |                    |                    |                                      |                     and if the result is not exact
+    //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
+    //                  |                    |                    |                                      |                     |               |
+    assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
+
+    // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
+    //      - Don't set the underflow flag if an underflowed res isn't outputed
+    assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
+
+    //                  if the res is too small to be represented and not 0
+    //                  |                                     and if the res is not invalid (outside the integer bounds)
+    //                  |                                     |
+    assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~Invalid;
+
+    // select the inexact flag to output
+    assign Inexact = ToInt ? IntInexact : FpInexact;
+
+    // Set Invalid flag for following cases:
+    //   1) any input is a signaling NaN
+    //   2) Inf - Inf (unless x or y is NaN)
+    //   3) 0 * Inf
+
+    //                  if the input is NaN or infinity
+    //                  |           if the integer res overflows (out of range) 
+    //                  |           |         if the input was negitive but ouputing to a unsigned number
+    //                  |           |         |                    the res doesn't round to zero
+    //                  |           |         |                    |               or the res rounds up out of bounds
+    //                  |           |         |                    |                       and the res didn't underflow
+    //                  |           |         |                    |                       |
+    assign IntInvalid = XNaNM|XInfM|Overflow|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
+    //                                                                                                     |
+    //                                                                                                     or when the positive res rounds up out of range
+    assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
+    assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
+    assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
+
+    assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp) | (IntInvalid&CvtOp&ToInt);
+
+
+    assign DivByZero = YZeroM&DivOp;  
+
+    // Combine flags
+    //      - to integer results do not set the underflow or overflow flags
+    assign PostProcFlgM = {Invalid, DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
+
+endmodule
+
+
+
+
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -30,73 +30,6 @@
 `include "wally-config.vh"

 module fma(
-    input logic                 clk,
-    input logic                 reset,
-    input logic                 FlushM,     // flush the memory stage
-    input logic                 StallM,     // stall memory stage
-    input logic  [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
-    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
-    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 XSgnE, YSgnE, ZSgnE,    // input signs - execute stage
-    input logic [`NE-1:0]       XExpE, YExpE, ZExpE,    // input exponents - execute stage
-    input logic [`NF:0]         XManE, YManE, ZManE,    // input mantissa - execute stage
-    input logic                 XSgnM, YSgnM,           // input signs - memory stage
-    input logic [`NE-1:0]       ZExpM,    // input exponents - memory stage
-    input logic [`NF:0]         XManM, YManM, ZManM,    // input mantissa - memory stage
-    input logic                 ZDenormE, // is denorm
-    input logic                 XZeroE, YZeroE, ZZeroE,     // is zero - execute stage
-    input logic                 XNaNM, YNaNM, ZNaNM,        // is NaN
-    input logic                 XSNaNM, YSNaNM, ZSNaNM,     // is signaling NaN
-    input logic                 XZeroM, YZeroM, ZZeroM,     // is zero - memory stage
-    input logic                 XInfM, YInfM, ZInfM,        // is infinity
-	output logic [`FLEN-1:0]    FMAResM,    // FMA result
-	output logic [4:0]		    FMAFlgM);   // FMA flags
-	
-  //fma/mult/add	
-      //  fmadd  = 000
-      //  fmsub  = 001
-      //  fnmsub = 010	-(a*b)+c
-      //  fnmadd = 011  -(a*b)-c
-      //  fmul   = 100
-      //  fadd   = 110
-      //  fsub   = 111
-
-    // signals transfered between pipeline stages
-    logic [3*`NF+5:0]	SumE, SumM;                       
-    logic [`NE+1:0]	    ProdExpE, ProdExpM;
-    logic 			    AddendStickyE, AddendStickyM;
-    logic 			    KillProdE, KillProdM;
-    logic 			    InvZE, InvZM;
-    logic 			    NegSumE, NegSumM;
-    logic 			    ZSgnEffE, ZSgnEffM;
-    logic 			    PSgnE, PSgnM;
-    logic [$clog2(3*`NF+7)-1:0]			NormCntE, NormCntM;
-    logic               Mult;
-    logic               ZDenormM;
-    
-    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-                .XZeroE, .YZeroE, .ZZeroE,
-                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
-                .ProdExpE, .AddendStickyE, .KillProdE); 
-                
-    // E/M pipeline registers
-    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
-    flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-    flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
-                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
-
-    fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
-            .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
-            .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
-            .FMAResM, .FMAFlgM);
-
-endmodule
-      
-
-        //*** in al units before putting into : ? put in a seperate signal
-
-module fma1(
    input logic                 XSgnE, YSgnE, ZSgnE,    // input's signs
    input logic  [`NE-1:0]      XExpE, YExpE, ZExpE,    // biased exponents in B(NE.0) format
    input logic  [`NF:0]        XManE, YManE, ZManE,    // fractions in U(0.NF) format
@ -111,7 +44,7 @@ module fma1(
    output logic                InvZE,          // intert Z
    output logic                ZSgnEffE,       // the modified Z sign
    output logic                PSgnE,          // the product's sign
-    output logic [$clog2(3*`NF+7)-1:0]          NormCntE        // normalization shift cnt
+    output logic [$clog2(3*`NF+7)-1:0]          FmaNormCntE        // normalization shift cnt
    );

    logic [2*`NF+1:0]   ProdManE;           // 1.X frac * 1.Y frac in U(2.2Nf) format
@ -151,7 +84,7 @@ module fma1(
        
    add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
    
-    loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
+    loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE);

    // Choose the positive sum and accompanying LZA result.
    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
@ -332,7 +265,7 @@ endmodule
 module loa( //https://ieeexplore.ieee.org/abstract/document/930098
    input logic  [3*`NF+6:0] A,     // addend
    input logic  [2*`NF+1:0] P,     // product
-    output logic [$clog2(3*`NF+7)-1:0]       NormCntE   // normalization shift count for the positive result
+    output logic [$clog2(3*`NF+7)-1:0]       FmaNormCntE   // normalization shift count for the positive result
    ); 
    
    logic [3*`NF+6:0] T;
@ -360,861 +293,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098



-    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
+    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
  
 endmodule
-
-
-
-
-
-
-
-
-
-module fma2(
-    
-    input logic                             XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]               ZExpM, // input exponents
-    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic     [`FMTBITS-1:0]          FmtM,       // precision 1 = double 0 = single
-    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
-    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
-    input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic     [3*`NF+5:0]             SumM,       // the positive sum
-    input logic                             NegSumM,    // was the sum negitive
-    input logic                             InvZM,      // do you invert Z
-    input logic                             ZDenormM, // is the original precision denormalized
-    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                             PSgnM,      // the product's sign
-    input logic                             Mult,       // multiply opperation
-    input logic     [$clog2(3*`NF+7)-1:0]   NormCntM,   // the normalization shift count
-    output logic    [`FLEN-1:0]             FMAResM,    // FMA final result
-    output logic    [4:0]                   FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
-   
-
-
-    logic [`NF-1:0]     ResultFrac; // Result fraction
-    logic [`NE-1:0]     ResultExp;  // Result exponent
-    logic               ResultSgn, ResultSgnTmp;  // Result sign
-    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
-    logic [`NE+1:0]     FullResultExp;  // ResultExp with bits to determine sign and overflow
-    logic [`NF+1:0]     NormSum;        // normalized sum
-    logic               NormSumSticky;  // sticky bit calulated from the normalized sum
-    logic               SumZero;        // is the sum zero
-    logic               ResultDenorm;   // is the result denormalized
-    logic               Sticky, UfSticky;           // Sticky bit
-    logic               CalcPlus1;                  // do you add or subtract one for rounding
-    logic               UfPlus1;                    // do you add one (for determining underflow flag)
-    logic               Invalid,Underflow,Overflow; // flags
-    logic               Guard, Round;   // bits needed to determine rounding
-    logic               UfLSBNormSum;   // bits needed to determine rounding for underflow flag
-    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
-   
-    
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Normalization
-    ///////////////////////////////////////////////////////////////////////////////
-
-    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum, 
-            .ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Rounding
-    ///////////////////////////////////////////////////////////////////////////////
-
-    // round to nearest even
-    // round to zero
-    // round to -infinity
-    // round to infinity
-    // round to nearest max magnitude
-
-    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
-        .CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
-
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Sign calculation
-    ///////////////////////////////////////////////////////////////////////////////
-
- 
-    resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .Mult, .ResultSgnTmp, .ResultSgn);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Flags
-    ///////////////////////////////////////////////////////////////////////////////
-
-    fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
-        .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfLSBNormSum, .Sticky, .UfPlus1,
-        .FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Select the result
-    ///////////////////////////////////////////////////////////////////////////////
-
-    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
-        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
-        .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
-        .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
-
-// *** use NF where needed
-
-endmodule
-
-module resultsign(
-    input logic [2:0]   FrmM,
-    input logic         PSgnM, ZSgnEffM,
-    input logic         Underflow,
-    input logic         InvZM,
-    input logic         NegSumM,
-    input logic         SumZero,
-    input logic         Mult,
-    output logic        ResultSgnTmp,
-    output logic        ResultSgn
-);
-
-    logic ZeroSgn;
-    // logic ResultSgnTmp;
-
-    // Determine the sign if the sum is zero
-    //      if cancelation then 0 unless round to -infinity
-    //      if multiply then Psgn
-    //      otherwise psign
-    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
-
-    // is the result negitive
-    //  if p - z is the Sum negitive
-    //  if -p + z is the Sum positive
-    //  if -p - z then the Sum is negitive
-    assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
-    assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
-
-endmodule
-
-
-module normalize(
-    input logic  [3*`NF+5:0]            SumM,       // the positive sum
-    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
-    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
-    input logic  [$clog2(3*`NF+7)-1:0]  NormCntM,   // normalization shift count
-    input logic  [`FMTBITS-1:0]         FmtM,       // precision 1 = double 0 = single
-    input logic                         KillProdM,  // is the product set to zero
-    input logic 			            ZDenormM,
-    input logic                         AddendStickyM,  // the sticky bit caclulated from the aligned addend
-    output logic [`NF+1:0]              NormSum,        // normalized sum
-    output logic                        SumZero,        // is the sum zero
-    output logic                        NormSumSticky, UfSticky,    // sticky bits
-    output logic [`NE+1:0]              SumExp,         // exponent of the normalized sum
-    output logic                        ResultDenorm    // is the result denormalized
-);
-    logic [`NE+1:0]             SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
-    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
-    logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
-    logic [3*`NF+8:0]           SumShifted;         // the shifted sum before LZA correction
-    logic [`NE+1:0]             SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
-    logic                       PreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Normalization
-    ///////////////////////////////////////////////////////////////////////////////
-    //*** insert bias-bias simplification in fcvt.sv/phone pictures
-    // Determine if the sum is zero
-    assign SumZero = ~(|SumM);
-
-    // calculate the sum's exponent
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
-
-    //convert the sum's exponent into the propper percision
-    if (`FPSIZES == 1) begin
-        assign SumExpTmp = SumExpTmpTmp;
-
-    end else if (`FPSIZES == 2) begin
-        assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: SumExpTmp = SumExpTmpTmp;
-                `FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
-                `FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
-                default: SumExpTmp = `NE+2'bx;
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin
-        always_comb begin
-            case (FmtM)
-                2'h3: SumExpTmp = SumExpTmpTmp;
-                2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
-                2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
-                2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
-            endcase
-        end
-
-    end
-    
-    // determine if the result is denormalized
-    
-    if (`FPSIZES == 1) begin
-        logic Sum0LEZ, Sum0GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
-
-    end else if (`FPSIZES == 2) begin
-        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
-        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
-        assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
-
-    end else if (`FPSIZES == 3) begin
-        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
-        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
-        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
-        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
-        always_comb begin
-            case (FmtM)
-                `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
-                `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
-                `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
-                default: PreResultDenorm = 1'bx;
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin
-        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
-        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF  )-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
-        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
-        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
-        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
-        assign Sum3LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
-        assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
-        always_comb begin
-            case (FmtM)
-                2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
-                2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
-                2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
-                2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
-            endcase
-        end
-
-    end
-
-    // 010. when should be 001.
-    //      - shift left one
-    //      - add one from exp
-    //      - if kill prod dont add to exp
-
-    // Determine if the result is denormal
-    // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
-
-    // Determine the shift needed for denormal results
-    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
-    // Normalize the sum
-    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
-    // LZA correction
-    assign LZAPlus1 = SumShifted[3*`NF+7];
-    assign LZAPlus2 = SumShifted[3*`NF+8];
-	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
-    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
-    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
-
-    // Calculate the sticky bit
-    if (`FPSIZES == 1) begin
-        assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
-
-    end else if (`FPSIZES == 2) begin
-        // 3*NF+5 - NF1 - 3
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
-        (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
-
-    end else if (`FPSIZES == 3) begin
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
-        (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) | 
-        (|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
-
-    end else if (`FPSIZES == 4) begin        
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
-        (|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | 
-        (|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
-        (|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
-
-    end
-
-    assign UfSticky = AddendStickyM | NormSumSticky;
-
-    // Determine sum's exponent
-    //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
-    // recalculate if the result is denormalized
-    assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
-
-endmodule
-
-module fmaround(
-    input logic  [`FMTBITS-1:0] FmtM,       // precision 1 = double 0 = single
-    input logic  [2:0]          FrmM,       // rounding mode
-    input logic                 UfSticky,   // sticky bit for underlow calculation
-    input logic  [`NF+1:0]      NormSum,    // normalized sum
-    input logic                 AddendStickyM,  // addend's sticky bit
-    input logic                 NormSumSticky,  // normalized sum's sticky bit
-    input logic                 ZZeroM,         // is Z zero
-    input logic                 InvZM,          // invert Z
-    input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
-    input logic                 ResultSgnTmp,      // the result's sign
-    output logic                CalcPlus1, UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]      FullResultExp,      // ResultExp with bits to determine sign and overflow
-    output logic [`NF-1:0]      ResultFrac,         // Result fraction
-    output logic [`NE-1:0]      ResultExp,          // Result exponent
-    output logic                Sticky,             // sticky bit
-    output logic [`FLEN:0]      RoundAdd,           // how much to add to the result
-    output logic                Round, Guard, UfLSBNormSum // bits needed to calculate rounding
-);
-    logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
-    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic           UfGuard;            // guard bit used to caluculate underflow
-    logic           UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
-    logic [`NF-1:0] NormSumTruncated;   // the normalized sum trimed to fit the mantissa
-    logic           UfRound;
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Rounding
-    ///////////////////////////////////////////////////////////////////////////////
-
-    // round to nearest even
-    //      {Guard, Round, Sticky}
-    //      0xx - do nothing
-    //      100 - tie - Plus1 if result is odd  (LSBNormSum = 1)
-    //          - don't add 1 if a small number was supposed to be subtracted
-    //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
-    //      110/111 - Plus1
-
-    //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
-
-    //  round to -infinity
-    //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
-    //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
-
-    //  round to infinity
-    //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
-    //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
-
-    //  round to nearest max magnitude
-    //      {Guard, Round, Sticky}
-    //      0xx - do nothing
-    //      100 - tie - Plus1
-    //          - don't add 1 if a small number was supposed to be subtracted
-    //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
-    //      110/111 - Plus1
-
-    if (`FPSIZES == 1) begin
-        // determine guard, round, and least significant bit of the result
-        assign Round = NormSum[1];
-        assign LSBNormSum = NormSum[2];
-
-        // used to determine underflow flag
-        assign UfRound = NormSum[0];
-
-    end else if (`FPSIZES == 2) begin
-        //         \/-------------NF---------------,
-        //      |      NF1       | 2 |             |
-        //          '-------NF1------^
-
-        // determine guard, round, and least significant bit of the result
-        assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
-        assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
-
-        // used to determine underflow flag
-        assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
-
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[1];
-                    LSBNormSum = NormSum[2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[0];
-                end
-                `FMT1: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`NF1+1];
-                    LSBNormSum = NormSum[`NF-`NF1+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`NF1];
-                end
-                `FMT2: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`NF2+1];
-                    LSBNormSum = NormSum[`NF-`NF2+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`NF2];
-                end
-                default: begin
-                    Round = 1'bx;
-                    LSBNormSum = 1'bx;
-                    UfRound = 1'bx;
-                end
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin
-        always_comb begin
-            case (FmtM)
-                2'h3: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[1];
-                    LSBNormSum = NormSum[2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[0];
-                end
-                2'h1: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`D_NF+1];
-                    LSBNormSum = NormSum[`NF-`D_NF+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`D_NF];
-                end
-                2'h0: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`S_NF+1];
-                    LSBNormSum = NormSum[`NF-`S_NF+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`S_NF];
-                end
-                2'h2: begin
-                    // determine guard, round, and least significant bit of the result
-                    Round = NormSum[`NF-`H_NF+1];
-                    LSBNormSum = NormSum[`NF-`H_NF+2];
-                    // used to determine underflow flag
-                    UfRound = NormSum[`NF-`H_NF];
-                end
-            endcase
-        end
-
-    end
-    // used to determine underflow flag
-    assign UfLSBNormSum = Round;
-    // determine sticky
-    assign Sticky = UfSticky | UfRound;
-
-
-    // Deterimine if a small number was supposed to be subtrated
-    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
-    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
-
-    always_comb begin
-        // Determine if you add 1
-        case (FrmM)
-            3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
-            3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
-            3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
-            3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
-            default: CalcPlus1 = 1'bx;
-        endcase
-        // Determine if you add 1 (for underflow flag)
-        case (FrmM)
-            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
-            3'b001: UfCalcPlus1 = 0;//round to zero
-            3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
-            3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
-            3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
-            default: UfCalcPlus1 = 1'bx;
-        endcase
-        // Determine if you subtract 1
-        case (FrmM)
-            3'b000: CalcMinus1 = 0;//round to nearest even
-            3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
-            3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
-            3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
-            3'b100: CalcMinus1 = 0;//round to nearest max magnitude
-            default: CalcMinus1 = 1'bx;
-        endcase
-   
-    end
-
-    // If an answer is exact don't round
-    assign Plus1 = CalcPlus1 & (Sticky | Round);
-    assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
-    assign Minus1 = CalcMinus1 & (Sticky | Round);
-
-    // Compute rounded result
-    if (`FPSIZES == 1) begin
-        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
-
-    end else if (`FPSIZES == 2) begin
-        // \/FLEN+1
-        //  | NE+2 |        NF      |
-        //  '-NE+2-^----NF1----^
-        // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
-        assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
-                                Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
-                `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
-                `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
-                default: RoundAdd = (`FLEN+1)'(0);
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb begin
-            case (FmtM)
-                2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
-                2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
-                2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
-                2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
-            endcase
-        end
-
-    end
-
-    assign NormSumTruncated = NormSum[`NF+1:2];
-    assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
-    assign ResultExp = FullResultExp[`NE-1:0];
-
-
-endmodule
-
-module fmaflags(
-    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XZeroM, YZeroM,         // inputs are zero
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic  [`NE+1:0]      FullResultExp,          // ResultExp with bits to determine sign and overflow
-    input logic  [`NE+1:0]      SumExp,                 // exponent of the normalized sum
-    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
-    input logic                 Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
-    input logic  [`FMTBITS-1:0] FmtM,                   // precision 1 = double 0 = single
-    output logic                Invalid, Overflow, Underflow, // flags used to select the result
-    output logic [4:0]          FMAFlgM // FMA flags
-);
-    logic               SigNaN;     // is an input a signaling NaN
-    logic               GtMaxExp;   // is exponent greater than the maximum
-    logic               UnderflowFlag, Inexact; // flags
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Flags
-    ///////////////////////////////////////////////////////////////////////////////
-
-
-
-    // Set Invalid flag for following cases:
-    //   1) any input is a signaling NaN
-    //   2) Inf - Inf (unless x or y is NaN)
-    //   3) 0 * Inf
-
-    assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
-    assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
-   
-    // Set Overflow flag if the number is too big to be represented
-    //      - Don't set the overflow flag if an overflowed result isn't outputed    
-    if (`FPSIZES == 1) begin
-        assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
-
-    end else if (`FPSIZES == 2) begin
-        assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
-                `FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
-                `FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
-                default: GtMaxExp = 1'bx;
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb begin
-            case (FmtM)
-                2'h3: GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
-                2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
-                2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
-                2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
-            endcase
-        end
-
-    end
-    assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-
-    // Set Underflow flag if the number is too small to be represented in normal numbers
-    //      - Don't set the underflow flag if the result is exact
-
-    assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-    //                      exp is negitive         result is denorm        exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
-    assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-    // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
-    //      - Don't set the underflow flag if an underflowed result isn't outputed
-    assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-
-    // Combine flags
-    //      - FMA can't set the Divide by zero flag
-    //      - Don't set the underflow flag if the result was rounded up to a normal number
-    assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
-
-endmodule
-
-
-module resultselect(
-    input logic                     XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]       ZExpM, // input exponents
-    input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic     [`FMTBITS-1:0]  FmtM,       // precision 1 = double 0 = single
-    input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                     ZDenormM, // is the original precision denormalized
-    input logic 		            ZZeroM,
-    input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                     PSgnM,      // the product's sign
-    input logic                     ResultSgn,  // the result's sign
-    input logic                     CalcPlus1,  // rounding bits
-    input logic     [`FLEN:0]       RoundAdd,   // how much to add to the result
-    input logic                     Invalid, Overflow, Underflow,  // flags
-    input logic                     ResultDenorm,       // is the result denormalized
-    input logic     [`NE-1:0]       ResultExp,          // Result exponent
-    input logic     [`NF-1:0]       ResultFrac,         // Result fraction
-    output logic    [`FLEN-1:0]     FMAResM     // FMA final result
-);
-    logic               InfSgn;
-    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
-    assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
-    if (`FPSIZES == 1) begin
-        if(`IEEE754) begin
-            assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-            assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-            assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
-            assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end else begin
-            assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end
-        assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                    {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
-        assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
-        assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
-        assign NormResult = {ResultSgn, ResultExp, ResultFrac};
-
-    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
-        if(`IEEE754) begin
-            assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
-            assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
-            assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
-            assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end else begin 
-            assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end
-        
-        assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                            {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                        ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
-                                                                                                                            {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
-        assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-        assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtM)
-                `FMT: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-                        YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-                        ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
-                        InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
-                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
-                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
-                    NormResult = {ResultSgn, ResultExp, ResultFrac};
-                end
-                `FMT1: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
-                        YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
-                        ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
-                        InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
-                                                                                                                                  {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
-                end
-                `FMT2: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
-                        YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
-                        ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
-                        InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
-                                                                                                                                  {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
-                end
-                default: begin
-                    if(`IEEE754) begin
-                        XNaNResult = (`FLEN)'(0);
-                        YNaNResult = (`FLEN)'(0);
-                        ZNaNResult = (`FLEN)'(0);
-                        InvalidResult = (`FLEN)'(0);
-                    end else begin 
-                        XNaNResult = (`FLEN)'(0);
-                    end
-                    OverflowResult = (`FLEN)'(0);
-                    KillProdResult = (`FLEN)'(0);
-                    UnderflowResult = (`FLEN)'(0);
-                    InfResult = (`FLEN)'(0);
-                    NormResult = (`FLEN)'(0);
-                end
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin 
-        always_comb begin
-            case (FmtM)
-                2'h3: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-                        YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-                        ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
-                        InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
-                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
-                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
-                    NormResult = {ResultSgn, ResultExp, ResultFrac};
-                end
-                2'h1: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
-                        YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
-                        ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
-                        InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
-                                                                                                                                  {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
-                end
-                2'h0: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
-                        YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
-                        ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
-                        InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
-                                                                                                                                  {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
-                end
-                2'h2: begin  
-                    if(`IEEE754) begin
-                        XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
-                        YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
-                        ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
-                        InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end else begin 
-                        XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end
-                    
-                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
-                                                                                                              {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
-
-                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
-                    UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-                    InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
-                    NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
-                end
-            endcase
-        end
-
-    end
-    if(`IEEE754) begin
-        assign FMAResM = XNaNM ? XNaNResult :
-                            YNaNM ? YNaNResult :
-                            ZNaNM ? ZNaNResult :
-                            Invalid ? InvalidResult :
-                            XInfM|YInfM|ZInfM ? InfResult :
-                            KillProdM ? KillProdResult :  
-                            Overflow ? OverflowResult :
-                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                            NormResult;
-    end else begin
-        assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
-                            XInfM|YInfM|ZInfM ? InfResult :
-                            KillProdM ? KillProdResult :  
-                            Overflow ? OverflowResult :
-                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                            NormResult;
-    end
-
-endmodule
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@ -0,0 +1,127 @@
+`include "wally-config.vh"
+
+module fmashiftcalc(
+    input logic  [3*`NF+5:0]            SumM,       // the positive sum
+    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
+    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
+    input logic  [$clog2(3*`NF+7)-1:0]  FmaNormCntM,   // normalization shift count
+    input logic  [`FMTBITS-1:0]         FmtM,       // precision 1 = double 0 = single
+    input logic                         KillProdM,  // is the product set to zero
+    input logic 			            ZDenormM,
+    output logic [`NE+1:0]              ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    output logic                        SumZero,    // is the result denormalized - calculated before LZA corection
+    output logic                        PreResultDenorm,    // is the result denormalized - calculated before LZA corection
+    output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
+    output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
+);
+    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
+    logic [`NE+1:0]             NormSumExp;       // the exponent of the normalized sum with the `FLEN bias
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Normalization
+    ///////////////////////////////////////////////////////////////////////////////
+    //*** insert bias-bias simplification in fcvt.sv/phone pictures
+    // Determine if the sum is zero
+    assign SumZero = ~(|SumM);
+
+    // calculate the sum's exponent
+    assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} + 1 - (`NE+2)'(`NF+4));
+
+    //convert the sum's exponent into the propper percision
+    if (`FPSIZES == 1) begin
+        assign ConvNormSumExp = NormSumExp;
+
+    end else if (`FPSIZES == 2) begin
+        assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtM)
+                `FMT: ConvNormSumExp = NormSumExp;
+                `FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+                `FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
+                default: ConvNormSumExp = `NE+2'bx;
+            endcase
+        end
+
+    end else if (`FPSIZES == 4) begin
+        always_comb begin
+            case (FmtM)
+                2'h3: ConvNormSumExp = NormSumExp;
+                2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
+                2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
+                2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
+            endcase
+        end
+
+    end
+    
+    // determine if the result is denormalized
+    
+    if (`FPSIZES == 1) begin
+        logic Sum0LEZ, Sum0GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+
+    end else if (`FPSIZES == 2) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
+        assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
+
+    end else if (`FPSIZES == 3) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
+        assign Sum2LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
+        assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
+        always_comb begin
+            case (FmtM)
+                `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+                `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
+                `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
+                default: PreResultDenorm = 1'bx;
+            endcase
+        end
+
+    end else if (`FPSIZES == 4) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
+        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF  )-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
+        assign Sum2LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
+        assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
+        assign Sum3LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
+        assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
+        always_comb begin
+            case (FmtM)
+                2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+                2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
+                2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
+                2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
+            endcase
+        end
+
+    end
+
+    // 010. when should be 001.
+    //      - shift left one
+    //      - add one from exp
+    //      - if kill prod dont add to exp
+
+    // Determine if the result is denormal
+    // assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero;
+
+    // Determine the shift needed for denormal results
+    //  - if not denorm add 1 to shift out the leading 1
+    assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
+    // set and calculate the shift input and amount
+    assign FmaShiftIn = {3'b0, SumM};
+    assign FmaShiftAmt = FmaNormCntM+DenormShift;
+endmodule
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -45,6 +45,8 @@ module fpu (
  output logic 		   FWriteIntE, // integer register write enables
  output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
  output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
+  output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
+  output logic [1:0]       FResSelW,
  output logic 		   FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
  output logic 		   IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
  output logic [4:0] 	   SetFflagsM        // FPU flags (to privileged unit)
@ -68,24 +70,24 @@ module fpu (
   logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW;             // FP precision 0-single 1-double
   logic 		  FDivStartD, FDivStartE;             // Start division or squareroot
   logic 		  FWriteIntD;                         // Write to integer register
+   logic 		  FWriteIntM;                         // Write to integer register
   logic [1:0] 	  FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
-   logic [1:0] 	  FResultSelD, FResultSelE;           // Select the result written to FP register
-   logic [1:0] 	  FResultSelM, FResultSelW;           // Select the result written to FP register
-   logic [2:0] 	  FOpCtrlD, FOpCtrlE;       // Select which opperation to do in each component
-   logic [1:0] 	  FResSelD, FResSelE;       // Select one of the results that finish in the memory stage
-   logic [1:0] 	  FIntResSelD, FIntResSelE;           // Select the result written to the integer resister
+   logic [2:0] 	  FOpCtrlD, FOpCtrlE, FOpCtrlM;       // Select which opperation to do in each component
+   logic [1:0] 	  FResSelD, FResSelE, FResSelM;       // Select one of the results that finish in the memory stage
+   logic [1:0] 	  PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
   logic [4:0] 	  Adr1E, Adr2E, Adr3E;                // adresses of each input

   // regfile signals
   logic [`FLEN-1:0] 	  FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
   logic [`FLEN-1:0] 	  FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
   logic [`FLEN-1:0] 	  FSrcXE;                             // Input 1 to the various units (after forwarding)
+   logic [`XLEN-1:0] 	  IntSrcXE;                             // Input 1 to the various units (after forwarding)
   logic [`FLEN-1:0] 	  FPreSrcYE, FSrcYE;                  // Input 2 to the various units (after forwarding)
   logic [`FLEN-1:0] 	  FPreSrcZE, FSrcZE;                  // Input 3 to the various units (after forwarding)

   // unpacking signals
   logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
-   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
+   logic 		  XSgnM;                       // input's sign - memory stage
   logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
   logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
   logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
@ -95,7 +97,7 @@ module fpu (
   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
   logic 		  XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
   logic 		  XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
-   logic 		  XDenormE, ZDenormE;       // is the input denormalized
+   logic 		  XDenormE, ZDenormE, ZDenormM;       // is the input denormalized
   logic 		  XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
   logic 		  XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
   logic 		  XZeroQ, YZeroQ;                     // is the input zero - divide
@ -104,24 +106,43 @@ module fpu (
   logic 		  XInfQ, YInfQ;                       // is the input infinity - divide
   logic 		  XExpMaxE;                           // is the exponent all ones (max value)
   logic 		  FmtQ;
-   logic 		  FOpCtrlQ;     
+   logic 		  FOpCtrlQ;   
+
+   // Fma Signals
+    logic [3*`NF+5:0]	SumE, SumM;                       
+    logic [`NE+1:0]	    ProdExpE, ProdExpM;
+    logic 			    AddendStickyE, AddendStickyM;
+    logic 			    KillProdE, KillProdM;
+    logic 			    InvZE, InvZM;
+    logic 			    NegSumE, NegSumM;
+    logic 			    ZSgnEffE, ZSgnEffM;
+    logic 			    PSgnE, PSgnM;
+    logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;
+
+   // Cvt Signals
+    logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
+	 logic [`LOGLGLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
+    logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
+    logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
+    logic                   IntZeroE, IntZeroM;      // is the integer zero?
+    logic [`LGLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)

   // result and flag signals
   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
   logic [4:0] 	  FDivFlgM;                 // divide/squareroot flags  
-   logic [`FLEN-1:0] 	  FMAResM, FMAResW;                   // FMA/multiply result
-   logic [4:0] 	  FMAFlgM;                   // FMA/multiply result	
   logic [`FLEN-1:0] 	  ReadResW;                           // read result (load instruction)
-   logic [`FLEN-1:0] 	  CvtResE;                   // FP <-> int convert result
-   logic [`XLEN-1:0] CvtIntResE;                   // FP <-> int convert result
-   logic [4:0] 	  CvtFlgE;                   // FP <-> int convert flags //*** trim this	
   logic [`XLEN-1:0] 	  ClassResE;               // classify result
-   logic [`FLEN-1:0] 	  CmpResE;                   // compare result
-   logic 		  CmpNVE;                     // compare invalid flag (Not Valid)     
+   logic [`XLEN-1:0] 	  FIntResE;               // classify result
+   logic [`FLEN-1:0] 	  FpResM, FpResW;               // classify result
+   logic [`FLEN-1:0] 	  PostProcResM;               // classify result
+   logic [4:0] 	  PostProcFlgM;               // classify result
+   logic [`XLEN-1:0] FCvtIntResM; 
+   logic [`FLEN-1:0] 	  CmpFpResE;                   // compare result
+   logic [`XLEN-1:0] 	  CmpIntResE;                   // compare result
+   logic 		           CmpNVE;                     // compare invalid flag (Not Valid)     
   logic [`FLEN-1:0] 	  SgnResE;                   // sign injection result
-   logic [`FLEN-1:0] 	  FResE, FResM, FResW;                // selected result that is ready in the memory stage
-   logic [4:0] 	  FFlgE, FFlgM;                       // selected flag that is ready in the memory stage     
-   logic [`XLEN-1:0] 	  FIntResE;     
+   logic [`FLEN-1:0] 	  PreFpResE, PreFpResM, PreFpResW;                // selected result that is ready in the memory stage
+   logic  	        PreNVE, PreNVM;                       // selected flag that is ready in the memory stage     
   logic [`FLEN-1:0] 	  FPUResultW;                         // final FP result being written to the FP register     
   // other signals
   logic 		  FDivSqrtDoneE;                      // is divide done
@ -133,10 +154,20 @@ module fpu (
   
   // DECODE STAGE

+   //////////////////////////////////////////////////////////////////////////////////////////
+   //          |||||||||||
+   //          |||      |||
+   //          |||       |||
+   //          |||       |||
+   //          |||       |||
+   //          |||      |||
+   //          |||||||||||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
   // calculate FP control signals
   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
-      .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, 
-      .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
+      .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, 
+      .FmtD, .FrmD, .FWriteIntD);

   // FP register file
   fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -150,20 +181,31 @@ module fpu (
   flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                           {Adr1E, Adr2E, Adr3E});
-   flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE, 
-               {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
-               {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
+   flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+               {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
+               {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});

   // EXECUTION STAGE
+   
+   //////////////////////////////////////////////////////////////////////////////////////////
+   //          ||||||||||||
+   //          |||
+   //          |||       
+   //          |||||||||
+   //          |||     
+   //          |||      
+   //          ||||||||||||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
   // Hazard unit for FPU  
   //    - determines if any forwarding or stalls are needed
-   fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, 
+   fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, 
                  .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);

   // forwarding muxs
-   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
-   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
-   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
+   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, FpResM, FForwardXE, FSrcXE);
+   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, FpResM, FForwardYE, FPreSrcYE);
+   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, FpResM, FForwardZE, FPreSrcZE);


   generate
@ -178,7 +220,7 @@ module fpu (
   endgenerate


-   mux2  #(`FLEN)  fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
+   mux2  #(`FLEN)  fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
   
   // Force Z to be 0 for multiply instructions 
   generate
@ -201,21 +243,12 @@ module fpu (
         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, 
         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
-
-   // FMA
-   //   - two stage FMA
-   //   - execute stage - multiplication and addend shifting
-   //   - memory stage  - addition and rounding
-   //   - handles FMA and multiply instructions
-   fma fma (.clk, .reset, .FlushM, .StallM, 
-      .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-      .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
-      .XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, 
-      .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
-      .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-      .FOpCtrlE,
-      .FmtE, .FmtM, .FrmM, 
-      .FMAFlgM, .FMAResM);
+   
+   // fma - does multiply, add, and multiply-add instructions 
+   fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, 
+            .XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, 
+            .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE, 
+            .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); 

   // fpdivsqrt using Goldschmidt's iteration
   if(`FLEN == 64) begin 
@ -245,11 +278,14 @@ module fpu (

   // other FP execution units
   fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
-            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
+            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
   fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
   fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
-   fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
-              .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
+
+   fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, 
+              .FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE, 
+              .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE, 
+              .CvtLzcInE);

   // data to be stored in memory - to IEU
   //    - FP uses NaN-blocking format
@ -269,16 +305,16 @@ module fpu (
                             {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
   endgenerate
   // select a result that may be written to the FP register
-   mux4  #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
-   mux4  #(5)  FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
+   mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
+   assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);

   // select the result that may be written to the integer register - to IEU
   if (`FLEN>`XLEN) 
-      mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE, 
-                  CvtIntResE, FIntResSelE, FIntResE);
+      assign IntSrcXE = FSrcXE[`XLEN-1:0];
   else 
-      mux4  #(`XLEN)  IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE, 
-                  CvtIntResE, FIntResSelE, FIntResE);
+      assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
+
+   mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
   // *** DH 5/25/22: CvtRes will move to mem stage.  Premux in execute to save area, then make sure stalls are ok
   // *** make sure the fpu matches the chapter diagram

@ -286,33 +322,68 @@ module fpu (

   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
-   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
+   flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
-   flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM, 
-            {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
-            {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});     
-   flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); 
-   flopenrc #(5)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);      
-   flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
-   flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
-               {FRegWriteE, FResultSelE, FrmE, FmtE},
-               {FRegWriteM, FResultSelM, FrmM, FmtM});
+   flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
+   flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
+   flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, 
+            {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
+            {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});     
+   flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
+   flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
+               {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
+               {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
+   flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
+   flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
+   flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, 
+                           {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
+                           {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
+   flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
+                           {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
+                           {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});

   // BEGIN MEMORY STAGE

+   //////////////////////////////////////////////////////////////////////////////////////////
+   //          |||         |||
+   //          ||||||   ||||||
+   //          ||| ||| ||| |||
+   //          |||  |||||  |||
+   //          |||   |||   |||
+   //          |||         |||
+   //          |||         |||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
+   postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+                           .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, 
+                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, 
+                           .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, 
+                           .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, 
+                           .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
+
   // FPU flag selection - to privileged
-   mux4  #(5)  FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
+   mux2  #(5)  FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
+   mux2  #(`FLEN)  FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM);

   // M/W pipe registers
-   flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
-   flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); 
-   flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
+   flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); 
+   flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
   flopenrc #(4+int'(`FMTBITS-1))  MWCtrlReg(clk, reset, FlushW, ~StallW,
-            {FRegWriteM, FResultSelM, FmtM},
-            {FRegWriteW, FResultSelW, FmtW});
+            {FRegWriteM, FResSelM, FmtM},
+            {FRegWriteW, FResSelW, FmtW});

   // BEGIN WRITEBACK STAGE

+   //////////////////////////////////////////////////////////////////////////////////////////
+   //         |||           |||
+   //         |||           |||
+   //         |||    |||    |||
+   //         |||   |||||   |||
+   //         |||  ||| |||  |||
+   //          ||||||   ||||||
+   //          |||         |||
+   //////////////////////////////////////////////////////////////////////////////////////////
+
   // put ReadData into NaN-blocking format
   //    - if there are any unsused bits the most significant bits are filled with 1s
   //    - for load instruction
@ -328,6 +399,6 @@ module fpu (
   endgenerate

   // select the result to be written to the FP register
-   if(`FLEN>=64)
-   mux4  #(`FLEN)  FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
+   mux2  #(`FLEN)  FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
+
 endmodule // fpu
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@ -0,0 +1,29 @@
+`include "wally-config.vh"
+
+module lzacorrection(
+    input logic  [`NORMSHIFTSZ-1:0]     Shifted,         // the shifted sum before LZA correction
+    input logic                         FmaOp,
+    input logic  [`NE+1:0]              ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic                         PreResultDenorm,    // is the result denormalized - calculated before LZA corection
+    input logic                         KillProdM,  // is the product set to zero
+    input logic                         SumZero,
+    output logic  [`CORRSHIFTSZ-1:0]    CorrShifted,         // the shifted sum before LZA correction
+    output logic [`NE+1:0]              SumExp         // exponent of the normalized sum
+);
+    logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
+    logic                        ResDenorm;    // is the result denormalized
+    logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
+
+    // LZA correction
+    assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
+    assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
+	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
+    assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
+    assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    // Determine sum's exponent
+    //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
+    assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
+    // recalculate if the result is denormalized
+    assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
+
+endmodule
--- a/pipelined/src/fpu/normshift.sv
+++ b/pipelined/src/fpu/normshift.sv
@ -0,0 +1,46 @@
+`include "wally-config.vh"
+
+
+ // convert shift
+    //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
+    //          process:
+    //              - start - CalcExp = 1 + XExp - Largest Bias
+    //                  |  `XLEN  zeros     |     Mantissa      | 0's if nessisary |
+    //
+    //              - shift left 1 (1)
+    //                  | `XLEN-1 zeros |bit|     frac      | 0's if nessisary |
+    //                                      . <- binary point
+    //
+    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
+    //                  |  0's |     Mantissa      |      0's if nessisary     |
+    //                  |     keep          |
+    //
+    //      fp -> fp:
+    //          - if result is denormalized or underflowed:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | << NF+CalcExp-1
+    //          process:
+    //             - start
+    //                 |     mantissa      | 0's |
+    //
+    //             - shift right by NF-1 (NF-1)
+    //                 |  `NF-1  zeros   |     mantissa      | 0's |
+    //
+    //             - shift left by CalcExp = XExp - Largest bias + new bias
+    //                 |   0's  |     mantissa      |     0's      |
+    //                 |       keep      |
+    //
+    //          - if the input is denormalized:
+    //              |     lzcIn      | 0's if nessisary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+    //
+    //      int -> fp: |     lzcIn      | 0's if nessisary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+
+module normshift(
+    input logic  [$clog2(`NORMSHIFTSZ)-1:0]      ShiftAmt,   // normalization shift count
+    input logic  [`NORMSHIFTSZ-1:0]              ShiftIn,        // is the sum zero
+    output logic [`NORMSHIFTSZ-1:0]             Shifted        // is the sum zero
+);
+    assign Shifted = ShiftIn << ShiftAmt;
+
+endmodule
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@ -0,0 +1,203 @@
+///////////////////////////////////////////
+//
+// Written: Katherine Parry, David Harris
+// Modified: 6/23/2021
+//
+// Purpose: Floating point multiply-accumulate of configurable size
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module postprocess(
+    input logic                             XSgnM,  // input signs
+    input logic     [`NE-1:0]               ZExpM, // input exponents
+    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [`FMTBITS-1:0]          FmtM,       // precision 1 = double 0 = single
+    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
+    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
+    input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
+    input logic     [3*`NF+5:0]             SumM,       // the positive sum
+    input logic                             NegSumM,    // was the sum negitive
+    input logic                             InvZM,      // do you invert Z
+    input logic                             ZDenormM, // is the original precision denormalized
+    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                             PSgnM,      // the product's sign
+    input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
+    input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
+    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    input logic CvtResDenormUfM,
+	input logic [`LOGLGLEN-1:0] CvtShiftAmtM,  // how much to shift by
+    input logic                   CvtResSgnM,     // the result's sign
+    input logic             FWriteIntM,     // is fp->int (since it's writting to the integer register)
+    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic             IntZeroM,         // is the input zero
+    input logic [1:0] PostProcSelM, // select result to be written to fp register
+    output logic    [`FLEN-1:0]    PostProcResM,    // FMA final result
+    output logic    [4:0]          PostProcFlgM,
+    output logic [`XLEN-1:0] FCvtIntResM    // the int conversion result
+    );
+   
+
+
+    logic [`NF-1:0]     ResFrac; // Result fraction
+    logic [`NE-1:0]     ResExp;  // Result exponent
+    logic  [`CORRSHIFTSZ-1:0]    CorrShifted;         // the shifted sum before LZA correction
+    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
+    logic [`NE+1:0]     FullResExp;  // ResExp with bits to determine sign and overflow
+    logic               SumZero;        // is the sum zero
+    logic               Sticky;           // Sticky bit
+    logic [3*`NF+8:0]            FmaShiftIn;        // is the sum zero
+    logic               UfPlus1;                    // do you add one (for determining underflow flag)
+    logic               Round;   // bits needed to determine rounding
+    logic [`LGLEN+`NF:0]    CvtShiftIn;    // number to be shifted
+    logic               Mult;       // multiply opperation
+    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
+    logic [`NE+1:0]     ConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
+    logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt;   // normalization shift count
+    logic [$clog2(`NORMSHIFTSZ)-1:0]  ShiftAmt;   // normalization shift count
+    logic [3*`NF+8:0]            ShiftIn;        // is the sum zero
+    logic [`NORMSHIFTSZ-1:0]    Shifted;    // the shifted result
+    logic                   Plus1;      // add one to the final result?
+    logic                   Overflow, Underflow, Invalid; // flags
+    logic                   Signed;     // is the opperation with a signed integer?
+    logic                   Int64;      // is the integer 64 bits?
+    logic                   IntToFp;       // is the opperation an int->fp conversion?
+    logic                   ToInt;      // is the opperation an fp->int conversion?
+    logic [`NE+1:0] RoundExp;
+    logic [1:0] NegResMSBS;
+    logic CvtOp;
+    logic FmaOp;
+    logic CvtResUf;
+    logic DivOp;
+    logic InfIn;
+    logic ResSgn;
+    logic NaNIn;
+    logic UfLSBRes;
+    logic Sqrt;
+    logic [`FMTBITS-1:0] OutFmt;
+
+    // signals to help readability
+    assign Signed = FOpCtrlM[0];
+    assign Int64 =  FOpCtrlM[1];
+    assign IntToFp =   FOpCtrlM[2];
+    assign ToInt =  FWriteIntM;
+    assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
+    assign CvtOp = (PostProcSelM == 2'b00);
+    assign FmaOp = (PostProcSelM == 2'b10);
+    assign DivOp = (PostProcSelM == 2'b01);
+    assign Sqrt = FOpCtrlM[0];
+
+    // is there an input of infinity or NaN being used
+    assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
+    assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp);
+
+    // choose the ouptut format depending on the opperation
+    //      - fp -> fp: OpCtrl contains the percision of the output
+    //      - otherwise: FmtM contains the percision of the output
+    if (`FPSIZES == 2) 
+        assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT); 
+    else if (`FPSIZES == 3 | `FPSIZES == 4) 
+        assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0]; 
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Normalization
+    ///////////////////////////////////////////////////////////////////////////////
+
+    cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,  
+                              .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
+    fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
+                          .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+
+    always_comb
+        case(PostProcSelM)
+            2'b10: begin // fma
+                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
+                ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
+            end
+            2'b00: begin // cvt
+                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
+                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
+            end
+            2'b01: begin //div
+                ShiftAmt = 0;//{DivShiftAmt};
+                ShiftIn =  0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
+            end
+            default: begin 
+                ShiftAmt = 0; 
+                ShiftIn = 0; 
+            end
+        endcase
+    
+    normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
+
+    lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
+                                .SumZero, .Shifted, .SumExp, .CorrShifted);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Rounding
+    ///////////////////////////////////////////////////////////////////////////////
+
+    // round to nearest even
+    // round to zero
+    // round to -infinity
+    // round to infinity
+    // round to nearest max magnitude
+
+    round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
+                .InvZM, .ResSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt,  .CvtResUf,
+                .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Sign calculation
+    ///////////////////////////////////////////////////////////////////////////////
+
+    resultsign resultsign(.FrmM, .PSgnM, .PostProcSelM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
+                          .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, .CvtResSgnM, .ResSgn);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Flags
+    ///////////////////////////////////////////////////////////////////////////////
+
+    flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM, 
+                .XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
+                .XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round,
+                .UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
+                .RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Select the result
+    ///////////////////////////////////////////////////////////////////////////////
+
+    resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM,
+        .IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf, 
+        .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
+        .FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
+
+endmodule
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@ -0,0 +1,282 @@
+`include "wally-config.vh"
+
+module resultselect(
+    input logic                     XSgnM,        // input signs
+    input logic     [`NE-1:0]       ZExpM, // input exponents
+    input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [`FMTBITS-1:0]  OutFmt,       // output format
+    input logic                     InfIn,
+    input logic                     XZeroM,
+    input logic                     IntZeroM,
+    input logic                     NaNIn,
+    input logic                     IntToFp,
+    input logic                     Int64,
+    input logic                     Signed,
+    input logic                     CvtOp,
+    input logic [`NORMSHIFTSZ-1:0]             Shifted,        // is the sum zero
+    input logic                     FmaOp,
+    input logic                     Plus1,
+    input logic [`NE:0]             CvtCalcExpM,    // the calculated expoent
+    input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                     ZDenormM, // is the original precision denormalized
+    input logic 		            ZZeroM,
+    input logic                     ResSgn,  // the res's sign
+    input logic     [`FLEN:0]       RoundAdd,   // how much to add to the res
+    input logic                     Invalid, Overflow,  // flags
+    input logic CvtResUf,
+    input logic     [`NE-1:0]       ResExp,          // Res exponent
+    input logic     [`NE+1:0]       FullResExp,          // Res exponent
+    input logic     [`NF-1:0]       ResFrac,         // Res fraction
+    output logic    [`FLEN-1:0]     PostProcResM,     // final res
+    output logic [1:0] NegResMSBS,
+    output logic    [`XLEN-1:0]     FCvtIntResM     // final res
+);
+    logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
+    logic OfResMax;
+    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
+    logic [`XLEN+1:0]       NegRes;     // the negation of the result
+    logic KillRes;
+
+
+    // does the overflow result output the maximum normalized floating point number
+    //                output infinity if the input is infinity
+    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
+
+    if (`FPSIZES == 1) begin
+
+        //NaN res selection depending on standard
+        if(`IEEE754) begin
+            assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+            assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+            assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end else begin
+            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end
+
+        assign OfRes =  OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+        assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+        assign NormRes = {ResSgn, ResExp, ResFrac};
+
+    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
+        if(`IEEE754) begin
+            assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
+            assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
+            assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end else begin 
+            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end
+        
+        assign OfRes =  OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
+                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+        assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
+        assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+                    KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+                    NormRes = {ResSgn, ResExp, ResFrac};
+                end
+                `FMT1: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
+                        YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
+                        ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end
+                    OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+                    KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
+                end
+                `FMT2: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
+                        YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
+                        ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
+                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
+                    KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
+                end
+                default: begin
+                    if(`IEEE754) begin
+                        XNaNRes = (`FLEN)'(0);
+                        YNaNRes = (`FLEN)'(0);
+                        ZNaNRes = (`FLEN)'(0);
+                        InvalidRes = (`FLEN)'(0);
+                    end else begin 
+                        InvalidRes = (`FLEN)'(0);
+                    end
+                    OfRes = (`FLEN)'(0);
+                    KillProdRes = (`FLEN)'(0);
+                    UfRes = (`FLEN)'(0);
+                    NormRes = (`FLEN)'(0);
+                end
+            endcase
+
+    end else if (`FPSIZES == 4) begin 
+        always_comb
+            case (OutFmt)
+                2'h3: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+                    KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+                    NormRes = {ResSgn, ResExp, ResFrac};
+                end
+                2'h1: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
+                        YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
+                        ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
+                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end
+                    OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
+                    KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
+                end
+                2'h0: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
+                        YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
+                        ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
+                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
+                    KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
+                end
+                2'h2: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
+                        YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
+                        ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
+                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
+
+                    KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
+                    NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
+                end
+            endcase
+
+    end
+
+    
+
+
+
+    // determine if you shoould kill the res - Cvt
+    //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
+    //      - dont set to zero if fp input is zero but not using the fp input
+    //      - dont set to zero if int input is zero but not using the int input
+    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
+
+    if(`IEEE754) begin
+        assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
+                         YNaNM&~CvtOp ? YNaNRes :
+                         ZNaNM&FmaOp ? ZNaNRes :
+                         Invalid ? InvalidRes : 
+                         Overflow|InfIn ? OfRes :
+                         KillProdM&FmaOp ? KillProdRes : 
+                         KillRes ? UfRes :  
+                         NormRes;
+    end else begin
+        assign PostProcResM = NaNIn|Invalid ? InvalidRes :
+                         Overflow|InfIn ? OfRes :
+                         KillProdM&FmaOp ? KillProdRes :  
+                         KillRes ? UfRes :  
+                         NormRes;
+    end
+
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    //      |||||||||||   |||     |||   |||||||||||||
+    //          |||       ||||||  |||        |||
+    //          |||       ||| ||| |||        |||
+    //          |||       |||  ||||||        |||
+    //      |||||||||||   |||     |||        |||
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////        
+
+    // *** probably can optimize the negation
+    // select the overflow integer res
+    //      - negitive infinity and out of range negitive input
+    //                 |  int  |  long  |
+    //          signed | -2^31 | -2^63  |
+    //        unsigned |   0   |    0   |
+    //
+    //      - positive infinity and out of range negitive input and NaNs
+    //                 |   int  |  long  |
+    //          signed | 2^31-1 | 2^63-1 |
+    //        unsigned | 2^32-1 | 2^64-1 |
+    //
+    //      other: 32 bit unsinged res should be sign extended as if it were a signed number
+    assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
+                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
+                               XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
+                                              {`XLEN{1'b1}};// unsigned positive
+    
+    // round and negate the positive res if needed
+    assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
+    
+    //*** false critical path probably
+    assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
+			              Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
+
+    // select the integer output
+    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow res
+    //      - if the input underflows
+    //          - if rounding and signed opperation and negitive input, output -1
+    //          - otherwise output a rounded 0
+    //      - otherwise output the normal res (trmined and sign extended if nessisary)
+    assign FCvtIntResM = Invalid ?  OfIntRes :
+			            CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
+                        Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
+endmodule
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@ -0,0 +1,50 @@
+`include "wally-config.vh"
+
+module resultsign(
+    input logic [2:0]   FrmM,
+    input logic         PSgnM, ZSgnEffM,
+    input logic         InvZM,
+    input logic         ZInfM,
+    input logic         InfIn,
+    input logic         NegSumM,
+    input logic [1:0] PostProcSelM,
+    input logic [`NE+1:0] SumExp,
+    input logic         SumZero,
+    input logic         Mult,
+    input logic         Round,
+    input logic         Sticky,
+    input logic         CvtResSgnM,
+    output logic        ResSgn
+);
+
+    logic ZeroSgn;
+    logic InfSgn;
+    logic FmaResSgn;
+    logic FmaResSgnTmp;
+    logic Underflow;
+    // logic ResultSgnTmp;
+
+    // Determine the sign if the sum is zero
+    //      if cancelation then 0 unless round to -infinity
+    //      if multiply then Psgn
+    //      otherwise psign
+    assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
+    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
+
+
+    // is the result negitive
+    //  if p - z is the Sum negitive
+    //  if -p + z is the Sum positive
+    //  if -p - z then the Sum is negitive
+    assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
+    assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
+    assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
+
+    always_comb
+        case(PostProcSelM)
+            2'b10: ResSgn = FmaResSgn; // fma
+            2'b00: ResSgn = CvtResSgnM; // cvt
+            2'b01: ResSgn = 0; // divide
+            default: ResSgn = 1'bx; 
+        endcase
+endmodule
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@ -0,0 +1,316 @@
+`include "wally-config.vh"
+// what position is XLEN in?
+//  options: 
+//     1: XLEN > NF   > NF1
+//     2: NF   > XLEN > NF1
+//     3: NF   > NF1  > XLEN
+//  single and double will always be smaller than XLEN
+`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
+
+module round(
+    input logic  [`FMTBITS-1:0] OutFmt,       // precision 1 = double 0 = single
+    input logic  [2:0]          FrmM,       // rounding mode
+    input logic                 FmaOp,
+    input logic [1:0] PostProcSelM,
+    input logic                 CvtResDenormUfM,
+    input logic                 ToInt,
+    input logic                 CvtOp,
+    input logic                 CvtResUf,
+    input logic [`CORRSHIFTSZ-1:0]  CorrShifted,
+    input logic                 AddendStickyM,  // addend's sticky bit
+    input logic                 ZZeroM,         // is Z zero
+    input logic                 InvZM,          // invert Z
+    input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
+    input logic                 ResSgn,      // the result's sign
+    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    output logic                UfPlus1,  // do you add or subtract on from the result
+    output logic [`NE+1:0]      FullResExp,      // ResExp with bits to determine sign and overflow
+    output logic [`NF-1:0]      ResFrac,         // Result fraction
+    output logic [`NE-1:0]      ResExp,          // Result exponent
+    output logic                Sticky,             // sticky bit
+    output logic [`NE+1:0] RoundExp,
+    output logic Plus1,
+    output logic [`FLEN:0]      RoundAdd,           // how much to add to the result
+    output logic                Round, UfLSBRes // bits needed to calculate rounding
+);
+    logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
+    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
+    logic           UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
+    logic                 NormSumSticky;  // normalized sum's sticky bit
+    logic                 UfSticky;   // sticky bit for underlow calculation
+    logic [`NF-1:0] RoundFrac;
+    logic FpRes, IntRes;
+    logic           UfRound;
+    logic           FpRound, FpLSBRes, FpUfRound;
+    logic           CalcPlus1, FpPlus1;
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Rounding
+    ///////////////////////////////////////////////////////////////////////////////
+
+    // round to nearest even
+    //      {Round, Sticky}
+    //      0x - do nothing
+    //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
+    //          - don't add 1 if a small number was supposed to be subtracted
+    //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+    //         - plus 1 otherwise
+
+    //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+    //  round to -infinity
+    //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+    //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+    //  round to infinity
+    //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+    //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
+
+    //  round to nearest max magnitude
+    //      {Guard, Round, Sticky}
+    //      0x - do nothing
+    //      10 - tie - Plus1
+    //          - don't add 1 if a small number was supposed to be subtracted
+    //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+    //         - Plus 1 otherwise
+
+    assign IntRes = CvtOp & ToInt;
+    assign FpRes = ~IntRes;
+
+    // sticky bit calculation
+    if (`FPSIZES == 1) begin
+
+    //     1: XLEN > NF
+    //      |         XLEN          |
+    //      |    NF     |1|1|
+    //                     ^    ^ if floating point result
+    //                     ^ if not an FMA result
+        if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                 (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
+    //     2: NF > XLEN
+        if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
+                                                 (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+
+    end else if (`FPSIZES == 2) begin
+        // XLEN is either 64 or 32
+        // so half and single are always smaller then XLEN
+
+        // 1: XLEN > NF   > NF1
+        if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
+        // 2: NF   > XLEN > NF1
+        if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+        // 3: NF   > NF1  > XLEN
+        if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+
+    end else if (`FPSIZES == 3) begin
+        // 1: XLEN > NF   > NF1
+        if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
+        // 2: NF   > XLEN > NF1
+        if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+        // 3: NF   > NF1  > XLEN
+        if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
+
+    end else if (`FPSIZES == 4) begin
+        // Quad precision will always be greater than XLEN
+        // 2: NF   > XLEN > NF1
+        if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
+        // 3: NF   > NF1  > XLEN
+        // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
+        if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
+
+    end
+    
+
+
+    // only add the Addend sticky if doing an FMA opperation
+    //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
+    assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
+    
+    // determine round and LSB of the rounded value
+    //      - underflow round bit is used to determint the underflow flag
+    if (`FPSIZES == 1) begin
+        assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
+        assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
+        assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
+
+    end else if (`FPSIZES == 2) begin
+        assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1];
+        assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1];
+        assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2];
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
+                end
+                `FMT1: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2];
+                end
+                `FMT2: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2];
+                end
+                default: begin
+                    FpRound = 1'bx;
+                    FpLSBRes = 1'bx;
+                    FpUfRound = 1'bx;
+                end
+            endcase
+    end else if (`FPSIZES == 4) begin
+        always_comb
+            case (OutFmt)
+                2'h3: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2];
+                end
+                2'h1: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2];
+                end
+                2'h0: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2];
+                end
+                2'h2: begin
+                    FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1];
+                    FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF];
+                    FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2];
+                end
+            endcase
+    end
+
+    assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound;
+    assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
+    assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
+
+    // used to determine underflow flag
+    assign UfLSBRes = FpRound;
+    // determine sticky
+    assign Sticky = UfSticky | UfRound;
+
+
+    // Deterimine if a small number was supposed to be subtrated - For Fma calculation only
+    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
+    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
+
+    always_comb begin
+        // Determine if you add 1
+        case (FrmM)
+            3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
+            3'b001: CalcPlus1 = 0;//round to zero
+            3'b010: CalcPlus1 = ResSgn & ~(SubBySmallNum & ~Round);//round down
+            3'b011: CalcPlus1 = ~ResSgn & ~(SubBySmallNum & ~Round);//round up
+            3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
+            default: CalcPlus1 = 1'bx;
+        endcase
+        // Determine if you add 1 (for underflow flag)
+        case (FrmM)
+            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
+            3'b001: UfCalcPlus1 = 0;//round to zero
+            3'b010: UfCalcPlus1 = ResSgn & ~(UfSubBySmallNum & ~UfRound);//round down
+            3'b011: UfCalcPlus1 = ~ResSgn & ~(UfSubBySmallNum & ~UfRound);//round up
+            3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
+            default: UfCalcPlus1 = 1'bx;
+        endcase
+        // Determine if you subtract 1
+        case (FrmM)
+            3'b000: CalcMinus1 = 0;//round to nearest even
+            3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
+            3'b010: CalcMinus1 = ~ResSgn & ~Round & SubBySmallNum;//round down
+            3'b011: CalcMinus1 = ResSgn & ~Round & SubBySmallNum;//round up
+            3'b100: CalcMinus1 = 0;//round to nearest max magnitude
+            default: CalcMinus1 = 1'bx;
+        endcase
+   
+    end
+
+    // If an answer is exact don't round
+    assign Plus1 = CalcPlus1 & (Sticky | Round);
+    assign FpPlus1 = Plus1&~(ToInt&CvtOp);
+    assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky
+    assign Minus1 = CalcMinus1 & (Sticky | Round);
+
+    // Compute rounded result
+    if (`FPSIZES == 1) begin
+        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
+
+    end else if (`FPSIZES == 2) begin
+        // \/FLEN+1
+        //  | NE+2 |        NF      |
+        //  '-NE+2-^----NF1----^
+        // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
+        assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
+                                   Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (OutFmt)
+                `FMT:  RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
+                `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+                `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
+                default: RoundAdd = (`FLEN+1)'(0);
+            endcase
+        end
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb begin
+            case (OutFmt)
+                2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
+                2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
+                2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
+                2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
+            endcase
+        end
+
+    end
+
+    // determine the result to be roundned
+    assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+    
+    always_comb
+        case(PostProcSelM)
+            2'b10: RoundExp = SumExp; // fma
+            2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
+            2'b01: RoundExp = 0; // divide
+            default: RoundExp = 0; 
+        endcase
+
+    // round the result
+    //      - if the fraction overflows one should be added to the exponent
+    assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd;
+    assign ResExp = FullResExp[`NE-1:0];
+
+
+endmodule
--- a/pipelined/src/ieu/datapath.sv
+++ b/pipelined/src/ieu/datapath.sv
@ -61,6 +61,8 @@ module datapath (
 (* mark_debug = "true" *)  input  logic             RegWriteW, 
  input  logic             SquashSCW,
  input  logic [2:0]       ResultSrcW,
+  input logic [`XLEN-1:0]  FCvtIntResW,
+  input logic [1:0]        FResSelW,
  output logic [`XLEN-1:0] ReadDataW,
  // input  logic [`XLEN-1:0] PCLinkW,
  input  logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW, 
@ -120,14 +122,17 @@ module datapath (
  flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
  flopenrc #(5)     RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
  flopen #(`XLEN)   ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
-  mux5  #(`XLEN)    resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 

  // floating point interactions: fcvt, fp stores
  if (`F_SUPPORTED) begin:fpmux
+    logic [`XLEN-1:0] IFCvtResultW;
    mux2  #(`XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
    mux2  #(`XLEN)  writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
+    mux2  #(`XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
+    mux5  #(`XLEN)    resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 
  end else begin:fpmux
    assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
+    mux5  #(`XLEN)    resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 
  end

  // handle Store Conditional result if atomic extension supported
--- a/pipelined/src/ieu/ieu.sv
+++ b/pipelined/src/ieu/ieu.sv
@ -61,6 +61,8 @@ module ieu (

  // Writeback stage
  input logic [`XLEN-1:0]  CSRReadValW, ReadDataM, MDUResultW,
+  input logic [1:0]        FResSelW,
+  input logic [`XLEN-1:0]  FCvtIntResW,
  output logic [4:0]       RdW,
  output logic [`XLEN-1:0] ReadDataW,
  // input  logic [`XLEN-1:0] PCLinkW,
@ -105,8 +107,8 @@ module ieu (
    .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
    .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
    .FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, 
-    .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE,
-    .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW,
+    .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
+    .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
    .CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);             
  
  forward    fw(
--- a/pipelined/src/ifu/ifu.sv
+++ b/pipelined/src/ifu/ifu.sv
@ -38,9 +38,13 @@ module ifu (
 	// Bus interface
 (* mark_debug = "true" *)	input logic [`XLEN-1:0] 	IFUBusHRDATA,
 (* mark_debug = "true" *)	input logic 				IFUBusAck,
+(* mark_debug = "true" *)	input logic 				IFUBusInit,
 (* mark_debug = "true" *)	output logic [`PA_BITS-1:0] IFUBusAdr,
 (* mark_debug = "true" *)	output logic 				IFUBusRead,
 (* mark_debug = "true" *)	output logic 				IFUStallF,
+(* mark_debug = "true" *) output logic [2:0]  IFUBurstType,
+(* mark_debug = "true" *) output logic [1:0]  IFUTransType,
+(* mark_debug = "true" *) output logic        IFUTransComplete,
 	(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF, 
 	// Execute
 	output logic [`XLEN-1:0] 	PCLinkE,
@ -201,8 +205,8 @@ module ifu (
    
    busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) 
    busdp(.clk, .reset,
-          .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusWriteCrit(),
-          .LSUBusRead(IFUBusRead), .LSUBusSize(), 
+          .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
+          .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
          .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr),
          .WordCount(), 
          .DCacheFetchLine(ICacheFetchLine),
--- a/pipelined/src/lsu/busdp.sv
+++ b/pipelined/src/lsu/busdp.sv
@ -40,9 +40,13 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
  // bus interface
  input logic [`XLEN-1:0]     LSUBusHRDATA,
  input logic                 LSUBusAck,
+  input logic                 LSUBusInit,
  output logic                LSUBusWrite,
  output logic                LSUBusRead,
-  output logic [2:0]          LSUBusSize, 
+  output logic [2:0]          LSUBusSize,
+  output logic [2:0]          LSUBurstType,
+  output logic [1:0]          LSUTransType, // For AHBLite
+  output logic                LSUTransComplete,
  input logic [2:0]           LSUFunct3M,
  output logic [`PA_BITS-1:0] LSUBusAdr, // ** change name to HADDR to make ahb lite.
  output logic [LOGWPL-1:0]   WordCount,
@ -66,13 +70,15 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
  
  localparam integer   WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0;
  logic [`PA_BITS-1:0]        LocalLSUBusAdr;
+  logic [LOGWPL-1:0]   WordCountDelayed;
+

  // *** implement flops as an array if feasbile; DCacheBusWriteData might be a problem
  // *** better name than DCacheBusWriteData
  genvar                      index;
  for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
    logic [WORDSPERLINE-1:0] CaptureWord;
-    assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCount);
+    assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed);
    flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA),
      .q(DCacheBusWriteData[(index+1)*`XLEN-1:index*`XLEN]));
  end
@ -83,6 +89,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)

  busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm(
    .clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
-		.LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
-		.DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount);
+		.LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
+		.LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed);
 endmodule
--- a/pipelined/src/lsu/busfsm.sv
+++ b/pipelined/src/lsu/busfsm.sv
@ -41,6 +41,7 @@ module busfsm #(parameter integer   WordCountThreshold,
   input logic               DCacheFetchLine,
   input logic               DCacheWriteLine,
   input logic               LSUBusAck,
+   input logic               LSUBusInit, // This might be better as LSUBusLock, or to send this using LSUBusAck.
   input logic               CPUBusy,
   input logic               CacheableM,

@ -48,10 +49,13 @@ module busfsm #(parameter integer   WordCountThreshold,
   output logic              LSUBusWrite,
   output logic              LSUBusWriteCrit,
   output logic              LSUBusRead,
+   output logic [2:0]        LSUBurstType,
+   output logic              LSUTransComplete,
+   output logic [1:0]        LSUTransType,
   output logic              DCacheBusAck,
   output logic              BusCommittedM,
   output logic              SelUncachedAdr,
-   output logic [LOGWPL-1:0] WordCount);
+   output logic [LOGWPL-1:0] WordCount, WordCountDelayed);
  

  
@ -61,7 +65,8 @@ module busfsm #(parameter integer   WordCountThreshold,
  logic 			   CntReset;
  logic 			   WordCountFlag;
  logic [LOGWPL-1:0]   NextWordCount;
-  logic 			   UnCachedAccess;
+  logic 			   UnCachedAccess, UnCachedRW;
+  logic [2:0]    LocalBurstType;
  

  typedef enum logic [2:0] {STATE_BUS_READY,
@ -75,18 +80,27 @@ module busfsm #(parameter integer   WordCountThreshold,

  (* mark_debug = "true" *) busstatetype BusCurrState, BusNextState;

-
+  // Used to send address for address stage of AHB.
  flopenr #(LOGWPL) 
  WordCountReg(.clk(clk),
 		.reset(reset | CntReset),
 		.en(CntEn),
 		.d(NextWordCount),
-		.q(WordCount));
+		.q(WordCount));  
+  
+  // Used to store data from data phase of AHB.
+  flopenr #(LOGWPL) 
+  WordCountDelayedReg(.clk(clk),
+		.reset(reset | CntReset),
+		.en(CntEn),
+		.d(WordCount),
+		.q(WordCountDelayed));

  assign NextWordCount = WordCount + 1'b1;

-  assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]);
-  assign CntEn = PreCntEn & LSUBusAck;
+  assign PreCntEn = (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_WRITE);
+  assign WordCountFlag = (WordCountDelayed == WordCountThreshold[LOGWPL-1:0]); // Detect when we are waiting on the final access.
+  assign CntEn = (PreCntEn & LSUBusAck | (LSUBusInit)) & ~WordCountFlag & ~UnCachedRW; // Want to count when doing cache accesses and we aren't wrapping up.

  assign UnCachedAccess = ~CACHE_ENABLED | ~CacheableM;

@ -120,14 +134,29 @@ module busfsm #(parameter integer   WordCountThreshold,
 	endcase
  end

+  always_comb begin
+    case(WordCountThreshold)
+      0:        LocalBurstType = 3'b000;
+      3:        LocalBurstType = 3'b011; // INCR4
+      7:        LocalBurstType = 3'b101; // INCR8
+      15:       LocalBurstType = 3'b111; // INCR16
+      default:  LocalBurstType = 3'b001; // INCR without end.
+    endcase
+  end

-  assign CntReset = BusCurrState == STATE_BUS_READY;
+  // Would these be better as always_comb statements or muxes?
+  assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access.
+  assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck;
+  // Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up.
+  assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00; 
+  // Reset if we aren't initiating a transaction or if we are finishing a transaction.
+  assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete; 
+  
  assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((UnCachedAccess & (|LSURWM)) | DCacheFetchLine | DCacheWriteLine)) |
 					(BusCurrState == STATE_BUS_UNCACHED_WRITE) |
 					(BusCurrState == STATE_BUS_UNCACHED_READ) |
 					(BusCurrState == STATE_BUS_FETCH)  |
 					(BusCurrState == STATE_BUS_WRITE);
-  assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE;
  assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) |
 							   (BusCurrState == STATE_BUS_UNCACHED_WRITE);
  assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
@ -139,6 +168,10 @@ module busfsm #(parameter integer   WordCountThreshold,
 							  (BusCurrState == STATE_BUS_UNCACHED_READ);
  assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);

+
+  // Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache.
+  assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead; 
+
  assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LSUBusAck) |
 						(BusCurrState == STATE_BUS_WRITE & WordCountFlag & LSUBusAck);
  assign BusCommittedM = BusCurrState != STATE_BUS_READY;
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@ -66,9 +66,13 @@ module lsu (
   (* mark_debug = "true" *)   output logic LSUBusRead, 
   (* mark_debug = "true" *)   output logic LSUBusWrite,
   (* mark_debug = "true" *)   input logic LSUBusAck,
+   (* mark_debug = "true" *)   input logic LSUBusInit,
   (* mark_debug = "true" *)   input logic [`XLEN-1:0] LSUBusHRDATA,
   (* mark_debug = "true" *)   output logic [`XLEN-1:0] LSUBusHWDATA,
   (* mark_debug = "true" *)   output logic [2:0] LSUBusSize, 
+   (* mark_debug = "true" *)   output logic [2:0] LSUBurstType,
+   (* mark_debug = "true" *)   output logic [1:0] LSUTransType,
+   (* mark_debug = "true" *)   output logic LSUTransComplete,
            // page table walker
   input logic [`XLEN-1:0]  SATP_REGW, // from csr
   input logic              STATUS_MXR, STATUS_SUM, STATUS_MPRV,
@ -211,7 +215,7 @@ module lsu (
            
    busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp(
      .clk, .reset,
-      .LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize,
+      .LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
      .WordCount, .LSUBusWriteCrit,
      .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine,
      .DCacheWriteLine, .DCacheBusAck, .DCacheBusWriteData, .LSUPAdrM,
--- a/pipelined/src/ppa/ppa.sv
+++ b/pipelined/src/ppa/ppa.sv
@ -525,7 +525,7 @@ module ppa_decoder #(parameter WIDTH = 8) (
  end
 endmodule

-module ppa_mux2_1 #(parameter WIDTH = 1) (
+module ppa_mux2d_1 #(parameter WIDTH = 1) (
  input  logic [WIDTH-1:0] d0, d1, 
  input  logic             s, 
  output logic [WIDTH-1:0] y);
@ -533,7 +533,7 @@ module ppa_mux2_1 #(parameter WIDTH = 1) (
  assign y = s ? d1 : d0; 
 endmodule

-module ppa_mux4_1 #(parameter WIDTH = 1) (
+module ppa_mux4d_1 #(parameter WIDTH = 1) (
  input  logic [WIDTH-1:0] d0, d1, d2, d3,
  input  logic [1:0]       s, 
  output logic [WIDTH-1:0] y);
@ -541,7 +541,7 @@ module ppa_mux4_1 #(parameter WIDTH = 1) (
  assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0); 
 endmodule

-module ppa_mux8_1 #(parameter WIDTH = 1) (
+module ppa_mux8d_1 #(parameter WIDTH = 1) (
  input  logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
  input  logic [2:0]       s, 
  output logic [WIDTH-1:0] y);
--- a/pipelined/src/uncore/gpio.sv
+++ b/pipelined/src/uncore/gpio.sv
@ -48,7 +48,7 @@ module gpio (

  logic [31:0] input0d, input1d, input2d, input3d;
  logic [31:0] input_val, input_en, output_en, output_val;
-  logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip; 
+  logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor; 

  logic initTrans, memwrite;
  logic [7:0] entry, entryd;
@ -91,6 +91,7 @@ module gpio (
      high_ip <= #1 0;
      low_ie <= #1 0;
      low_ip <= #1 0;
+      out_xor <= #1 0;
    end else begin
      // writes
      if (memwrite)
@ -104,7 +105,7 @@ module gpio (
          8'h20: fall_ie <= #1 Din;
          8'h28: high_ie <= #1 Din;
          8'h30: low_ie  <= #1 Din;
-          8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR
+          8'h40: out_xor <= #1 Din; 
        endcase
        /* verilator lint_on CASEINCOMPLETE */
      // reads
@ -121,7 +122,7 @@ module gpio (
        8'h2C: Dout <= #1 high_ip;
        8'h30: Dout <= #1 low_ie;
        8'h34: Dout <= #1 low_ip;
-        8'h40: Dout <= #1 0; // OUT_XOR reads as 0
+        8'h40: Dout <= #1 out_xor;
        default: Dout <= #1 0;
      endcase
      // interrupts
@ -152,7 +153,7 @@ module gpio (
  flop #(32) sync2(HCLK,input1d,input2d);
  flop #(32) sync3(HCLK,input2d,input3d);
  assign input_val = input3d;
-  assign GPIOPinsOut = output_val;
+  assign GPIOPinsOut = output_val ^ out_xor;
  assign GPIOPinsEn = output_en;

  assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)};
--- a/pipelined/src/uncore/ram.sv
+++ b/pipelined/src/uncore/ram.sv
@ -43,77 +43,37 @@ module ram #(parameter BASE=0, RANGE = 65535) (
  output logic             HRESPRam, HREADYRam
 );

-  // Desired changes.
-  // 1. find a way to merge read and write address into 1 port.
-  // 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.)
-  // 3. implement burst.
-  // 4. remove the configurable latency.
+  localparam ADDR_WIDTH = $clog2(RANGE/8);
+  localparam OFFSET = $clog2(`XLEN/8);   

  logic [`XLEN/8-1:0] 		  ByteMask;
  logic [31:0]        HADDRD, RamAddr;
-  //logic				  prevHREADYRam, risingHREADYRam;
  logic				  initTrans;
  logic				  memwrite, memwriteD, memread;
  logic         nextHREADYRam;
-  //logic [3:0] 		  busycount;
-  
-  swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));

+  // a new AHB transactions starts when HTRANS requests a transaction, 
+  // the peripheral is selected, and the previous transaction is completing
  assign initTrans = HREADY & HSELRam & (HTRANS[1]); 
-  assign memwrite = initTrans & HWRITE;  // *** why is initTrans needed?  See CLINT interface
+  assign memwrite = initTrans & HWRITE;  
  assign memread = initTrans & ~HWRITE;
 
  flopenr #(1) memwritereg(HCLK, ~HRESETn, HREADY, memwrite, memwriteD); 
  flopenr #(32)   haddrreg(HCLK, ~HRESETn, HREADY, HADDR, HADDRD);

-/*  // busy FSM to extend READY signal
-  always @(posedge HCLK, negedge HRESETn) 
-    if (~HRESETn) begin
-      busycount <= 0;
-      HREADYRam <= #1 0;
-    end else begin
-      if (initTrans) begin
-        busycount <= 0;
-        HREADYRam <= #1 0;
-      end else if (~HREADYRam) begin
-        if (busycount == 0) begin // Ram latency, for testing purposes.  *** test with different values such as 2
-          HREADYRam <= #1 1;
-        end else begin
-          busycount <= busycount + 1;
-        end
-      end
-    end */
-
-
  // Stall on a read after a write because the RAM can't take both adddresses on the same cycle
  assign nextHREADYRam = ~(memwriteD & memread);
-// assign nextHREADYRam = ~(memwriteD & ~memwrite);
  flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam);
-//  assign HREADYRam = ~(memwriteD & ~memwrite);
  assign HRESPRam = 0; // OK

-  localparam ADDR_WIDTH = $clog2(RANGE/8);
-  localparam OFFSET = $clog2(`XLEN/8);
-  
-/*  // Rising HREADY edge detector
-  //   Indicates when ram is finishing up
-  //   Needed because HREADY may go high for other reasons,
-  //   and we only want to write data when finishing up.
-  flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
-  assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/
-
-/*
- bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
-  memory(.clk(HCLK), .reA(1'b1),
-		 .addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
-		 .weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
-		 .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */
-
-    
-
  // On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address
  mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr);

+  // Byte mask for subword writes
+  // ***the CLINT and other peripherals duplicate this hardware
+  // *** it shoudl be centralized and sent over HWSTRB
+  swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
+
  // single-ported RAM
  bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH)
    memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA));  
--- a/pipelined/src/wally/wallypipelinedcore.sv
+++ b/pipelined/src/wally/wallypipelinedcore.sv
@ -93,10 +93,12 @@ module wallypipelinedcore (
  logic             FWriteIntE;
  logic [`XLEN-1:0]         FWriteDataE;
  logic [`XLEN-1:0]         FIntResM;  
+  logic [`XLEN-1:0]         FCvtIntResW;  
  logic             FDivBusyE;
  logic             IllegalFPUInstrD, IllegalFPUInstrE;
  logic             FRegWriteM;
  logic             FPUStallD;
+  logic [1:0]       FResSelW;
  logic [4:0]             SetFflagsM;

  // memory management unit signals
@ -134,13 +136,16 @@ module wallypipelinedcore (
  logic [`PA_BITS-1:0]         IFUBusAdr;
  logic [`XLEN-1:0]         IFUBusHRDATA;
  logic             IFUBusRead;
-  logic             IFUBusAck;
+  logic             IFUBusAck, IFUBusInit;
+  logic [2:0]       IFUBurstType;
+  logic [1:0]       IFUTransType;
+  logic             IFUTransComplete;
  
  // AHB LSU interface
  logic [`PA_BITS-1:0]         LSUBusAdr;
  logic             LSUBusRead;
  logic             LSUBusWrite;
-  logic             LSUBusAck;
+  logic             LSUBusAck, LSUBusInit;
  logic [`XLEN-1:0]         LSUBusHRDATA;
  logic [`XLEN-1:0]         LSUBusHWDATA;
  
@ -152,6 +157,9 @@ module wallypipelinedcore (
  logic [4:0]             InstrClassM;
  logic             InstrAccessFaultF;
  logic [2:0]             LSUBusSize;
+  logic [2:0]             LSUBurstType;
+  logic [1:0]             LSUTransType;
+  logic             LSUTransComplete;
  
  logic             DCacheMiss;
  logic             DCacheAccess;
@ -166,8 +174,8 @@ module wallypipelinedcore (
    .StallF, .StallD, .StallE, .StallM, 
    .FlushF, .FlushD, .FlushE, .FlushM, 
    // Fetch
-    .IFUBusHRDATA, .IFUBusAck, .PCF, .IFUBusAdr,
-    .IFUBusRead, .IFUStallF,
+    .IFUBusHRDATA, .IFUBusAck, .IFUBusInit, .PCF, .IFUBusAdr,
+    .IFUBusRead, .IFUStallF, .IFUBurstType, .IFUTransType, .IFUTransComplete,
    .ICacheAccess, .ICacheMiss,

    // Execute
@ -224,6 +232,8 @@ module wallypipelinedcore (
     .CSRReadValW, .ReadDataM, .MDUResultW,
     .RdW, .ReadDataW,
     .InstrValidM, 
+     .FCvtIntResW,
+     .FResSelW,

     // hazards
     .StallD, .StallE, .StallM, .StallW,
@ -247,8 +257,8 @@ module wallypipelinedcore (
  .IEUAdrE, .IEUAdrM, .WriteDataE,
  .ReadDataM, .FlushDCacheM,
  // connected to ahb (all stay the same)
-  .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck,
-  .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize,
+  .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
+  .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,

    // connect to csr or privilege and stay the same.
    .PrivilegeModeW, .BigEndianM,          // connects to csr
@ -279,13 +289,22 @@ module wallypipelinedcore (
  ahblite ebu(// IFU connections
     .clk, .reset,
     .UnsignedLoadM(1'b0), .AtomicMaskedM(2'b00),
-     .IFUBusAdr,
-     .IFUBusRead, .IFUBusHRDATA, .IFUBusAck,
+     .IFUBusAdr, .IFUBusRead, 
+     .IFUBusHRDATA, 
+     .IFUBurstType, 
+     .IFUTransType, 
+     .IFUTransComplete,
+     .IFUBusAck, 
+     .IFUBusInit, 
     // Signals from Data Cache
     .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusHWDATA,
     .LSUBusHRDATA,
     .LSUBusSize,
+     .LSUBurstType,
+     .LSUTransType,
+     .LSUTransComplete,
     .LSUBusAck,
+     .LSUBusInit,
 
     .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
     .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST,
@ -375,6 +394,8 @@ module wallypipelinedcore (
         .FWriteIntE, // integer register write enable
         .FWriteDataE, // Data to be written to memory
         .FIntResM, // data to be written to integer register
+         .FCvtIntResW, // fp -> int conversion result to be stored in int register
+         .FResSelW,   // fpu result selection
         .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
         .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
         .SetFflagsM        // FPU flags (to privileged unit)
--- a/pipelined/srt/testvectors
+++ b/pipelined/srt/testvectors
@ -1,289 +0,0 @@
-4000000000000000_4000000000000000_3ff0000000000000
-c018000000000000_4000000000000000_c008000000000000
-4024000000000000_4000000000000000_4014000000000000
-c032000000000000_4000000000000000_c022000000000000
-4041000000000000_4000000000000000_4031000000000000
-c05c000000000000_4000000000000000_c04c000000000000
-406e000000000000_4000000000000000_405e000000000000
-c07ffff583a53b8e_4000000000000000_c06ffff583a53b8e
-408199999999999a_4000000000000000_407199999999999a
-c093333333333333_4000000000000000_c083333333333333
-40a028f5c28f5c29_4000000000000000_409028f5c28f5c29
-c0b004189374bc6a_4000000000000000_c0a004189374bc6a
-40c00068db8bac71_4000000000000000_40b00068db8bac71
-c0dd1745d1745d17_4000000000000000_c0cd1745d1745d17
-40e5555555555555_4000000000000000_40d5555555555555
-c0f999999999999a_4000000000000000_c0e999999999999a
-410c71c71c71c71c_4000000000000000_40fc71c71c71c71c
-4000000000000000_c018000000000000_bfe5555555555555
-c018000000000000_c018000000000000_3ff0000000000000
-4024000000000000_c018000000000000_c00aaaaaaaaaaaab
-c032000000000000_c018000000000000_4018000000000000
-4041000000000000_c018000000000000_c026aaaaaaaaaaab
-c05c000000000000_c018000000000000_4032aaaaaaaaaaab
-406e000000000000_c018000000000000_c044000000000000
-c07ffff583a53b8e_c018000000000000_4055554e57c37d09
-408199999999999a_c018000000000000_c067777777777778
-c093333333333333_c018000000000000_4079999999999999
-40a028f5c28f5c29_c018000000000000_c0858bf258bf258c
-c0b004189374bc6a_c018000000000000_40955acb6f46508d
-40c00068db8bac71_c018000000000000_c0a555e124ba3b41
-c0dd1745d1745d17_c018000000000000_40b364d9364d9365
-40e5555555555555_c018000000000000_c0cc71c71c71c71c
-c0f999999999999a_c018000000000000_40d1111111111111
-410c71c71c71c71c_c018000000000000_c0e2f684bda12f68
-4000000000000000_4024000000000000_3fd999999999999a
-c018000000000000_4024000000000000_bfe3333333333333
-4024000000000000_4024000000000000_3ff0000000000000
-c032000000000000_4024000000000000_c00ccccccccccccd
-4041000000000000_4024000000000000_401b333333333333
-c05c000000000000_4024000000000000_c026666666666666
-406e000000000000_4024000000000000_4038000000000000
-c07ffff583a53b8e_4024000000000000_c0499991361dc93e
-408199999999999a_4024000000000000_405c28f5c28f5c2a
-c093333333333333_4024000000000000_c06eb851eb851eb8
-40a028f5c28f5c29_4024000000000000_4079db22d0e56042
-c0b004189374bc6a_4024000000000000_c089a027525460aa
-40c00068db8bac71_4024000000000000_40999a415f45e0b5
-c0dd1745d1745d17_4024000000000000_c0a745d1745d1746
-40e5555555555555_4024000000000000_40b1111111111111
-c0f999999999999a_4024000000000000_c0c47ae147ae147b
-410c71c71c71c71c_4024000000000000_40d6c16c16c16c16
-4000000000000000_c032000000000000_bfcc71c71c71c71c
-c018000000000000_c032000000000000_3fd5555555555555
-4024000000000000_c032000000000000_bfe1c71c71c71c72
-c032000000000000_c032000000000000_3ff0000000000000
-4041000000000000_c032000000000000_c00e38e38e38e38e
-c05c000000000000_c032000000000000_4018e38e38e38e39
-406e000000000000_c032000000000000_c02aaaaaaaaaaaab
-c07ffff583a53b8e_c032000000000000_403c71bdca59fc0c
-408199999999999a_c032000000000000_c04f49f49f49f4a0
-c093333333333333_c032000000000000_4051111111111111
-40a028f5c28f5c29_c032000000000000_c06cba9876543210
-c0b004189374bc6a_c032000000000000_407c790f3f086b67
-40c00068db8bac71_c032000000000000_c08c7281864da457
-c0dd1745d1745d17_c032000000000000_4099dbcc48676f31
-40e5555555555555_c032000000000000_c0a2f684bda12f68
-c0f999999999999a_c032000000000000_40b6c16c16c16c17
-410c71c71c71c71c_c032000000000000_c0c948b0fcd6e9e0
-4000000000000000_4041000000000000_3fbe1e1e1e1e1e1e
-c018000000000000_4041000000000000_bfc6969696969697
-4024000000000000_4041000000000000_3fd2d2d2d2d2d2d3
-c032000000000000_4041000000000000_bfe0f0f0f0f0f0f1
-4041000000000000_4041000000000000_3ff0000000000000
-c05c000000000000_4041000000000000_c00a5a5a5a5a5a5a
-406e000000000000_4041000000000000_401c3c3c3c3c3c3c
-c07ffff583a53b8e_4041000000000000_c02e1e143faa9268
-408199999999999a_4041000000000000_4030909090909091
-c093333333333333_4041000000000000_c042121212121212
-40a028f5c28f5c29_4041000000000000_405e6b3804d19e6b
-c0b004189374bc6a_4041000000000000_c06e25d3e863448b
-40c00068db8bac71_4041000000000000_407e1ee37f25085c
-c0dd1745d1745d17_4041000000000000_c08b6132a7041b61
-40e5555555555555_4041000000000000_4094141414141414
-c0f999999999999a_4041000000000000_c0a8181818181818
-410c71c71c71c71c_4041000000000000_40bac5701ac5701a
-4000000000000000_c05c000000000000_bfa2492492492492
-c018000000000000_c05c000000000000_3fbb6db6db6db6db
-4024000000000000_c05c000000000000_bfc6db6db6db6db7
-c032000000000000_c05c000000000000_3fd4924924924925
-4041000000000000_c05c000000000000_bfe36db6db6db6db
-c05c000000000000_c05c000000000000_3ff0000000000000
-406e000000000000_c05c000000000000_c001249249249249
-c07ffff583a53b8e_c05c000000000000_4012491e945e6b2d
-408199999999999a_c05c000000000000_c0241d41d41d41d5
-c093333333333333_c05c000000000000_4035f15f15f15f16
-40a028f5c28f5c29_c05c000000000000_c04277f44c118de6
-c0b004189374bc6a_c05c000000000000_40524dd2f1a9fbe7
-40c00068db8bac71_c05c000000000000_c062499c689fa081
-c0dd1745d1745d17_c05c000000000000_40709f959c427e56
-40e5555555555555_c05c000000000000_c088618618618618
-c0f999999999999a_c05c000000000000_409d41d41d41d41e
-410c71c71c71c71c_c05c000000000000_c0a0410410410410
-4000000000000000_406e000000000000_3f91111111111111
-c018000000000000_406e000000000000_bfa999999999999a
-4024000000000000_406e000000000000_3fb5555555555555
-c032000000000000_406e000000000000_bfc3333333333333
-4041000000000000_406e000000000000_3fd2222222222222
-c05c000000000000_406e000000000000_bfedddddddddddde
-406e000000000000_406e000000000000_3ff0000000000000
-c07ffff583a53b8e_406e000000000000_c001110b796930d4
-408199999999999a_406e000000000000_4012c5f92c5f92c6
-c093333333333333_406e000000000000_c0247ae147ae147b
-40a028f5c28f5c29_406e000000000000_40313cc1e098ead6
-c0b004189374bc6a_406e000000000000_c041156f8c384071
-40c00068db8bac71_406e000000000000_40511180ea2e95ce
-c0dd1745d1745d17_406e000000000000_c06f07c1f07c1f07
-40e5555555555555_406e000000000000_4076c16c16c16c16
-c0f999999999999a_406e000000000000_c08b4e81b4e81b4f
-410c71c71c71c71c_406e000000000000_409e573ac901e573
-4000000000000000_c07ffff583a53b8e_bf8000053e2f1a08
-c018000000000000_c07ffff583a53b8e_3f980007dd46a70b
-4024000000000000_c07ffff583a53b8e_bfa400068dbae089
-c032000000000000_c07ffff583a53b8e_3fb20005e5f4fd48
-4041000000000000_c07ffff583a53b8e_bfc1000592120ba8
-c05c000000000000_c07ffff583a53b8e_3fdc00092cd26d8d
-406e000000000000_c07ffff583a53b8e_bfee0009d49850ce
-c07ffff583a53b8e_c07ffff583a53b8e_3ff0000000000000
-408199999999999a_c07ffff583a53b8e_c001999f5e009ca2
-c093333333333333_c07ffff583a53b8e_401333397dd21f3c
-40a028f5c28f5c29_c07ffff583a53b8e_c02028fb0e2a73e4
-c0b004189374bc6a_c07ffff583a53b8e_4030041dd2fb6fd0
-40c00068db8bac71_c07ffff583a53b8e_c040006e19dd229c
-c0dd1745d1745d17_c07ffff583a53b8e_405d174f59ca00c8
-40e5555555555555_c07ffff583a53b8e_c065555c52e9780a
-c0f999999999999a_c07ffff583a53b8e_407999a1fd1829a6
-410c71c71c71c71c_c07ffff583a53b8e_c08c71d06e8ca00d
-4000000000000000_408199999999999a_3f7d1745d1745d17
-c018000000000000_408199999999999a_bf85d1745d1745d1
-4024000000000000_408199999999999a_3f922e8ba2e8ba2e
-c032000000000000_408199999999999a_bfa05d1745d1745d
-4041000000000000_408199999999999a_3fbee8ba2e8ba2e8
-c05c000000000000_408199999999999a_bfc9745d1745d174
-406e000000000000_408199999999999a_3fdb45d1745d1745
-c07ffff583a53b8e_408199999999999a_bfed173c4921d90c
-408199999999999a_408199999999999a_3ff0000000000000
-c093333333333333_408199999999999a_c001745d1745d174
-40a028f5c28f5c29_408199999999999a_401d61bed61bed61
-c0b004189374bc6a_408199999999999a_c02d1eb851eb851d
-40c00068db8bac71_408199999999999a_403d180477e6ade4
-c0dd1745d1745d17_408199999999999a_c04a723f789854a0
-40e5555555555555_408199999999999a_405364d9364d9364
-c0f999999999999a_408199999999999a_c06745d1745d1746
-410c71c71c71c71c_408199999999999a_4079dbcc48676f30
-4000000000000000_c093333333333333_bf6aaaaaaaaaaaab
-c018000000000000_c093333333333333_3f74000000000000
-4024000000000000_c093333333333333_bf80aaaaaaaaaaab
-c032000000000000_c093333333333333_3f9e000000000000
-4041000000000000_c093333333333333_bfac555555555556
-c05c000000000000_c093333333333333_3fb7555555555556
-406e000000000000_c093333333333333_bfc9000000000000
-c07ffff583a53b8e_c093333333333333_3fdaaaa1edb45c4c
-408199999999999a_c093333333333333_bfed555555555556
-c093333333333333_c093333333333333_3ff0000000000000
-40a028f5c28f5c29_c093333333333333_c00aeeeeeeeeeeef
-c0b004189374bc6a_c093333333333333_401ab17e4b17e4b1
-40c00068db8bac71_c093333333333333_c02aab596de8ca12
-c0dd1745d1745d17_c093333333333333_40383e0f83e0f83e
-40e5555555555555_c093333333333333_c041c71c71c71c72
-c0f999999999999a_c093333333333333_4055555555555556
-410c71c71c71c71c_c093333333333333_c067b425ed097b42
-4000000000000000_40a028f5c28f5c29_3f5faee41e6a7498
-c018000000000000_40a028f5c28f5c29_bf67c32b16cfd772
-4024000000000000_40a028f5c28f5c29_3f73cd4e930288df
-c032000000000000_40a028f5c28f5c29_bf81d260511be196
-4041000000000000_40a028f5c28f5c29_3f90d4e930288df1
-c05c000000000000_40a028f5c28f5c29_bfabb9079a9d2605
-406e000000000000_40a028f5c28f5c29_3fbdb3f5dc83cd4f
-c07ffff583a53b8e_40a028f5c28f5c29_bfcfaed9bca398bf
-408199999999999a_40a028f5c28f5c29_3fd16cfd7720f354
-c093333333333333_40a028f5c28f5c29_bfe30288df0cac5b
-40a028f5c28f5c29_40a028f5c28f5c29_3ff0000000000000
-c0b004189374bc6a_40a028f5c28f5c29_c00fb70081c635bb
-40c00068db8bac71_40a028f5c28f5c29_401fafb3c1f3a182
-c0dd1745d1745d17_40a028f5c28f5c29_c02ccd899003afd0
-40e5555555555555_40a028f5c28f5c29_40351f42bef1a310
-c0f999999999999a_40a028f5c28f5c29_c04958b67ebb907a
-410c71c71c71c71c_40a028f5c28f5c29_405c29ae53ecd96a
-4000000000000000_c0b004189374bc6a_bf4ff7d0f16c2e0a
-c018000000000000_c0b004189374bc6a_3f57f9dcb5112287
-4024000000000000_c0b004189374bc6a_bf63fae296e39cc6
-c032000000000000_c0b004189374bc6a_3f71fb6587ccd9e5
-4041000000000000_c0b004189374bc6a_bf80fba700417875
-c05c000000000000_c0b004189374bc6a_3f9bf8d6d33ea848
-406e000000000000_c0b004189374bc6a_bfadf853e2556b29
-c07ffff583a53b8e_c0b004189374bc6a_3fbff7c677bfebb5
-408199999999999a_c0b004189374bc6a_bfc1951951951953
-c093333333333333_c0b004189374bc6a_3fd32e4a2a741b9f
-40a028f5c28f5c29_c0b004189374bc6a_bfe024d3c19930d9
-c0b004189374bc6a_c0b004189374bc6a_3ff0000000000000
-40c00068db8bac71_c0b004189374bc6a_c00ff8a272e15ca2
-c0dd1745d1745d17_c0b004189374bc6a_401d0fd53890e409
-40e5555555555555_c0b004189374bc6a_c0254fe0a0f2c95b
-c0f999999999999a_c0b004189374bc6a_4039930d8df024d5
-410c71c71c71c71c_c0b004189374bc6a_c04c6a80d6990c7a
-4000000000000000_40c00068db8bac71_3f3fff2e4e46e7a8
-c018000000000000_40c00068db8bac71_bf47ff62bab52dbe
-4024000000000000_40c00068db8bac71_3f53ff7cf0ec50c9
-c032000000000000_40c00068db8bac71_bf61ff8a0c07e24f
-4041000000000000_40c00068db8bac71_3f70ff909995ab11
-c05c000000000000_40c00068db8bac71_bf8bff48847e0ab3
-406e000000000000_40c00068db8bac71_3f9dff3b6962792e
-c07ffff583a53b8e_40c00068db8bac71_bfafff23d230d9a4
-408199999999999a_40c00068db8bac71_3fb1992644a6ff6a
-c093333333333333_40c00068db8bac71_bfc332b5622a8afe
-40a028f5c28f5c29_40c00068db8bac71_3fd0288bdd4a34fd
-c0b004189374bc6a_40c00068db8bac71_bfe003af9fc0ed8b
-40c00068db8bac71_40c00068db8bac71_3ff0000000000000
-c0dd1745d1745d17_40c00068db8bac71_c00d16872fe35e3c
-40e5555555555555_40c00068db8bac71_401554c989849a70
-c0f999999999999a_40c00068db8bac71_c02998f1d838b954
-410c71c71c71c71c_40c00068db8bac71_403c710cb75b7895
-4000000000000000_c0dd1745d1745d17_bf2199999999999a
-c018000000000000_c0dd1745d1745d17_3f3a666666666667
-4024000000000000_c0dd1745d1745d17_bf46000000000000
-c032000000000000_c0dd1745d1745d17_3f53cccccccccccd
-4041000000000000_c0dd1745d1745d17_bf62b33333333333
-c05c000000000000_c0dd1745d1745d17_3f7ecccccccccccd
-406e000000000000_c0dd1745d1745d17_bf80800000000000
-c07ffff583a53b8e_c0dd1745d1745d17_3f919993d5347a5b
-408199999999999a_c0dd1745d1745d17_bfa35c28f5c28f5d
-c093333333333333_c0dd1745d1745d17_3fb51eb851eb851f
-40a028f5c28f5c29_c0dd1745d1745d17_bfc1c6a7ef9db22d
-c0b004189374bc6a_c0dd1745d1745d17_3fd19e1b089a0275
-40c00068db8bac71_c0dd1745d1745d17_bfe19a0cf1800a7c
-c0dd1745d1745d17_c0dd1745d1745d17_3ff0000000000000
-40e5555555555555_c0dd1745d1745d17_c007777777777777
-c0f999999999999a_c0dd1745d1745d17_401c28f5c28f5c2a
-410c71c71c71c71c_c0dd1745d1745d17_c02f49f49f49f49f
-4000000000000000_40e5555555555555_3f18000000000000
-c018000000000000_40e5555555555555_bf22000000000000
-4024000000000000_40e5555555555555_3f3e000000000000
-c032000000000000_40e5555555555555_bf4b000000000000
-4041000000000000_40e5555555555555_3f59800000000000
-c05c000000000000_40e5555555555555_bf65000000000000
-406e000000000000_40e5555555555555_3f76800000000000
-c07ffff583a53b8e_40e5555555555555_bf87fff822bbecab
-408199999999999a_40e5555555555555_3f9a666666666667
-c093333333333333_40e5555555555555_bfaccccccccccccd
-40a028f5c28f5c29_40e5555555555555_3fb83d70a3d70a3e
-c0b004189374bc6a_40e5555555555555_bfc80624dd2f1a9f
-40c00068db8bac71_40e5555555555555_3fd8009d495182aa
-c0dd1745d1745d17_40e5555555555555_bfe5d1745d1745d2
-40e5555555555555_40e5555555555555_3ff0000000000000
-c0f999999999999a_40e5555555555555_c003333333333334
-410c71c71c71c71c_40e5555555555555_4015555555555555
-4000000000000000_c0f999999999999a_bf04000000000000
-c018000000000000_c0f999999999999a_3f1e000000000000
-4024000000000000_c0f999999999999a_bf29000000000000
-c032000000000000_c0f999999999999a_3f36800000000000
-4041000000000000_c0f999999999999a_bf45400000000000
-c05c000000000000_c0f999999999999a_3f51800000000000
-406e000000000000_c0f999999999999a_bf62c00000000000
-c07ffff583a53b8e_c0f999999999999a_3f73fff972474538
-408199999999999a_c0f999999999999a_bf86000000000000
-c093333333333333_c0f999999999999a_3f97ffffffffffff
-40a028f5c28f5c29_c0f999999999999a_bfa4333333333333
-c0b004189374bc6a_c0f999999999999a_3fb4051eb851eb84
-40c00068db8bac71_c0f999999999999a_bfc40083126e978d
-c0dd1745d1745d17_c0f999999999999a_3fd22e8ba2e8ba2e
-40e5555555555555_c0f999999999999a_bfeaaaaaaaaaaaaa
-c0f999999999999a_c0f999999999999a_3ff0000000000000
-410c71c71c71c71c_c0f999999999999a_c001c71c71c71c71
-4000000000000000_410c71c71c71c71c_3ef2000000000000
-c018000000000000_410c71c71c71c71c_bf0b000000000000
-4024000000000000_410c71c71c71c71c_3f16800000000000
-c032000000000000_410c71c71c71c71c_bf24400000000000
-4041000000000000_410c71c71c71c71c_3f33200000000000
-c05c000000000000_410c71c71c71c71c_bf4f800000000000
-406e000000000000_410c71c71c71c71c_3f50e00000000000
-c07ffff583a53b8e_410c71c71c71c71c_bf61fffa1a0cf180
-408199999999999a_410c71c71c71c71c_3f73ccccccccccce
-c093333333333333_410c71c71c71c71c_bf8599999999999a
-40a028f5c28f5c29_410c71c71c71c71c_3f922e147ae147ae
-c0b004189374bc6a_410c71c71c71c71c_bfa2049ba5e353f8
-40c00068db8bac71_410c71c71c71c71c_3fb20075f6fd21ff
-c0dd1745d1745d17_410c71c71c71c71c_bfc05d1745d1745d
-40e5555555555555_410c71c71c71c71c_3fd8000000000000
-c0f999999999999a_410c71c71c71c71c_bfecccccccccccce
-410c71c71c71c71c_410c71c71c71c71c_3ff0000000000000
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -10,120 +10,64 @@ module testbenchfp;
  parameter TEST="none";

  string      Tests[];        // list of tests to be run
-  string      FmaRneTests[];  // list of FMA round to nearest even tests to run
-  string      FmaRuTests[];   // list of FMA round up tests to run
-  string      FmaRdTests[];   // list of FMA round down tests to run
-  string      FmaRzTests[];   // list of FMA round twords zero
-  string      FmaRnmTests[];  // list of FMA round to nearest max magnitude
  logic [2:0] OpCtrl[];       // list of op controls
  logic [2:0] Unit[];         // list of units being tested
  logic WriteInt[];           // Is being written to integer resgiter
  logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
  logic [1:0] Fmt[];          // list of formats for the other units
-  logic [1:0] FmaFmt[];       // list of formats for the FMA
  

  logic               clk=0;
  logic [31:0]        TestNum=0;    // index for the test
-  logic [31:0]        FmaTestNum=0;    // index for the test
  logic [31:0]        OpCtrlNum=0;  // index for OpCtrl
  logic [31:0]        errors=0;     // how many errors
  logic [31:0]        VectorNum=0;  // index for test vector
-  logic [31:0]        FmaVectorNum=0;  // index for test vector
  logic [31:0]        FrmNum=0;     // index for rounding mode
-  logic [`FLEN*4+7:0] TestVectors[46464:0];     // list of test vectors
-  logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; // list of fma rne test vectors
-  logic [`FLEN*4+7:0] FmaRuVectors[6133248:0];  // list of fma ru test vectors
-  logic [`FLEN*4+7:0] FmaRdVectors[6133248:0];  // list of fma rd test vectors
-  logic [`FLEN*4+7:0] FmaRzVectors[6133248:0];  // list of fma rz test vectors
-  logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; // list of fma rnm test vectors
+  logic [`FLEN*4+7:0] TestVectors[6133248:0];     // list of test vectors

-  logic [1:0]           FmaFmtVal, FmtVal;          // value of the current Fmt
+  logic [1:0]           FmtVal;          // value of the current Fmt
  logic [2:0]           UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
  logic                 WriteIntVal;                // value of the current WriteInt
  logic [`FLEN-1:0]     X, Y, Z;                    // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRneX, FmaRneY, FmaRneZ;  // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRzX, FmaRzY, FmaRzZ;     // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRuX, FmaRuY, FmaRuZ;     // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRdX, FmaRdY, FmaRdZ;     // inputs read from TestFloat
-  logic [`FLEN-1:0]     FmaRnmX, FmaRnmY, FmaRnmZ;  // inputs read from TestFloat
  logic [`XLEN-1:0]     SrcA;                       // integer input
  logic [`FLEN-1:0]	    Ans;                        // correct answer from TestFloat
-  logic [`FLEN-1:0]     FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat
  logic [`FLEN-1:0]	    Res;                                                // result from other units
-  logic [`FLEN-1:0]	    FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // results from FMA
  logic [4:0]	 	        AnsFlg;                                             // correct flags read from testfloat
-  logic [4:0]           FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
-  logic [4:0]	 	        ResFlg;                                                            // Result flags
-  logic [4:0]           FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
-  logic	[`FMTBITS-1:0]  ModFmt, FmaModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
-  logic [`FLEN-1:0]     FmaRes, DivRes, CmpRes, CvtRes;  // Results from each unit
-  logic [`XLEN-1:0]     CvtIntRes;  // Results from each unit
+  logic [4:0]	 	        ResFlg, Flg;                                                            // Result flags
+  logic	[`FMTBITS-1:0]  ModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
+  logic [`FLEN-1:0]     FpRes, FpCmpRes;  // Results from each unit
+  logic [`XLEN-1:0]     IntRes, CmpRes;  // Results from each unit
  logic [4:0]           FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
-  logic                 ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN;   // is the outputed result NaN
-  logic                 AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN;   // is the correct answer NaN
-  logic                 NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
+  logic                 AnsNaN, ResNaN, NaNGood;
  logic                 XSgn, YSgn, ZSgn;                     // sign of the inputs
-  logic                 FmaRneXSgn, FmaRneYSgn, FmaRneZSgn;
-  logic                 FmaRzXSgn, FmaRzYSgn, FmaRzZSgn;
-  logic                 FmaRuXSgn, FmaRuYSgn, FmaRuZSgn;
-  logic                 FmaRdXSgn, FmaRdYSgn, FmaRdZSgn;
-  logic                 FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn;
  logic [`NE-1:0]       XExp, YExp, ZExp;                     // exponent of the inputs
-  logic [`NE-1:0]       FmaRneXExp, FmaRneYExp, FmaRneZExp;
-  logic [`NE-1:0]       FmaRzXExp, FmaRzYExp, FmaRzZExp;
-  logic [`NE-1:0]       FmaRuXExp, FmaRuYExp, FmaRuZExp;
-  logic [`NE-1:0]       FmaRdXExp, FmaRdYExp, FmaRdZExp;
-  logic [`NE-1:0]       FmaRnmXExp, FmaRnmYExp, FmaRnmZExp;
  logic [`NF:0]         XMan, YMan, ZMan;                     // mantissas of the inputs
-  logic [`NF:0]         FmaRneXMan, FmaRneYMan, FmaRneZMan;
-  logic [`NF:0]         FmaRzXMan, FmaRzYMan, FmaRzZMan;
-  logic [`NF:0]         FmaRuXMan, FmaRuYMan, FmaRuZMan;
-  logic [`NF:0]         FmaRdXMan, FmaRdYMan, FmaRdZMan;
-  logic [`NF:0]         FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
  logic                 XNaN, YNaN, ZNaN;                     // is the input NaN
-  logic                 FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
-  logic                 FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
-  logic                 FmaRuXNaN, FmaRuYNaN, FmaRuZNaN;
-  logic                 FmaRdXNaN, FmaRdYNaN, FmaRdZNaN;
-  logic                 FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN;
  logic                 XSNaN, YSNaN, ZSNaN;                  // is the input a signaling NaN
-  logic                 FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN;
-  logic                 FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN;
-  logic                 FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
-  logic                 FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
-  logic                 FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
  logic                 XDenorm, ZDenorm;            // is the input denormalized
-  logic                 FmaRneXDenorm, FmaRneZDenorm;
-  logic                 FmaRzXDenorm, FmaRzZDenorm;
-  logic                 FmaRuXDenorm, FmaRuZDenorm;
-  logic                 FmaRdXDenorm, FmaRdZDenorm;
-  logic                 FmaRnmXDenorm, FmaRnmZDenorm;
  logic                 XInf, YInf, ZInf;                   // is the input infinity
-  logic                 FmaRneXInf, FmaRneYInf, FmaRneZInf;
-  logic                 FmaRzXInf, FmaRzYInf, FmaRzZInf;
-  logic                 FmaRuXInf, FmaRuYInf, FmaRuZInf;
-  logic                 FmaRdXInf, FmaRdYInf, FmaRdZInf;
-  logic                 FmaRnmXInf, FmaRnmYInf, FmaRnmZInf;
  logic                 XZero, YZero, ZZero;                // is the input zero
-  logic                 FmaRneXZero, FmaRneYZero, FmaRneZZero;
-  logic                 FmaRzXZero, FmaRzYZero, FmaRzZZero;
-  logic                 FmaRuXZero, FmaRuYZero, FmaRuZZero;
-  logic                 FmaRdXZero, FmaRdYZero, FmaRdZZero;
-  logic                 FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
  logic                 XExpMax, YExpMax, ZExpMax;         // is the input's exponent all ones  
+  logic  [`LGLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
+  logic        IntZeroE;
+  logic CvtResSgnE;
+  logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
+  logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
+	logic [`LOGLGLEN-1:0] CvtShiftAmtE;  // how much to shift by
+  logic CvtResDenormUfE;
+  

  // in-between FMA signals
  logic                 Mult;
-  logic [`NE+1:0]	      ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp;
-  logic 				        AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky;
-  logic 					      KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd; 
-  logic [$clog2(3*`NF+7)-1:0]	NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt;
-  logic [3*`NF+5:0]	    SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum;       
-  logic 			          InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ;
-  logic 			          NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum;
-  logic 			          ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff;
-  logic 			          PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn;
+  logic [`NE+1:0]	      ProdExpE;
+  logic 				        AddendStickyE;
+  logic 					      KillProdE; 
+  logic [$clog2(3*`NF+7)-1:0]	FmaNormCntE;
+  logic [3*`NF+5:0]	    SumE;       
+  logic 			          InvZE;
+  logic 			          NegSumE;
+  logic 			          ZSgnEffE;
+  logic 			          PSgnE;


  ///////////////////////////////////////////////////////////////////////////////////////////////
@ -282,15 +226,13 @@ module testbenchfp;
      //     end
      // end
      if (TEST === "fma"   | TEST === "all") begin  // if fused-mutliply-add is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f128_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f128_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f128_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
-        // add the format for the Fma
-        FmaFmt = {FmaFmt, 2'b11};
+        Tests = {Tests, f128fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b11};
+        end
      end
    end
    if (`D_SUPPORTED) begin // if double precision is supported
@ -411,14 +353,13 @@ module testbenchfp;
      //   end
      // end
      if (TEST === "fma"   | TEST === "all") begin // if the fused multiply add is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f64_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f64_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f64_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
-        FmaFmt = {FmaFmt, 2'b01};
+        Tests = {Tests, f64fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b01};
+        end
      end
    end
    if (`F_SUPPORTED) begin // if single precision being supported
@ -523,14 +464,13 @@ module testbenchfp;
      //   end
      // end
      if (TEST === "fma"   | TEST === "all")  begin // if fma is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f32_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f32_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f32_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
-        FmaFmt = {FmaFmt, 2'b00};
+        Tests = {Tests, f32fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b00};
+        end
      end
    end
    if (`ZFH_SUPPORTED) begin // if half precision supported
@ -617,19 +557,18 @@ module testbenchfp;
      //   end
      // end
      if (TEST === "fma"   | TEST === "all") begin // if fma is being tested
-        // add each rounding mode to it's own list of tests
-        //    - fma tests are very long, so run all rounding modes in parallel
-        FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
-        FmaRzTests  = {FmaRzTests,  "f16_mulAdd_rz.tv"};
-        FmaRuTests  = {FmaRuTests,  "f16_mulAdd_ru.tv"};
-        FmaRdTests  = {FmaRdTests,  "f16_mulAdd_rd.tv"};
-        FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
-        FmaFmt = {FmaFmt, 2'b10};
+        Tests = {Tests, f16fma};
+        OpCtrl = {OpCtrl, `FMA_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `FMAUNIT};
+          Fmt = {Fmt, 2'b10};
+        end
      end
    end

    // check if nothing is being tested
-    if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin
+    if (Tests.size() == 0) begin
      $display("TEST %s not supported in this configuration", TEST);
      $stop;
    end
@ -648,26 +587,17 @@ module testbenchfp;
  // Read the first test
  initial begin
    $display("\n\nRunning %s vectors", Tests[TestNum]);
-    $display("Running FMA precision %d", FmaTestNum);
    $readmemh({`PATH, Tests[TestNum]}, TestVectors);
-    $readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
-    $readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
-    $readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
-    $readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
-    $readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
    // set the test index to 0
    TestNum = 0;
-    FmaTestNum = 0;
  end

  // set a the signals for all tests
-  always_comb FmaFmtVal = FmaFmt[FmaTestNum];
  always_comb UnitVal = Unit[TestNum];
  always_comb FmtVal = Fmt[TestNum];
  always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
  always_comb WriteIntVal = WriteInt[OpCtrlNum];
  always_comb FrmVal = Frm[FrmNum];
-  assign Mult = OpCtrlVal === 3'b100;

  // modify the format signal if only 2 percisions supported
  //    - 1 for the larger precision
@ -675,61 +605,9 @@ module testbenchfp;
  always_comb begin
    if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
    else ModFmt = FmtVal;
-    if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
-    else FmaModFmt = FmaFmtVal;
  end

  // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
-  readfmavectors readfmarnevectors (.clk, .TestVector(FmaRneVectors[FmaVectorNum]), .Ans(FmaRneAns), .AnsFlg(FmaRneAnsFlg), 
-                                    .XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
-                                    .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), 
-                                    .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), 
-                                    .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
-                                    .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), 
-                                    .XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm), 
-                                    .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
-                                    .XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
-                                    .X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
-  readfmavectors readfmarzvectors (.clk, .TestVector(FmaRzVectors[FmaVectorNum]), .Ans(FmaRzAns), .AnsFlg(FmaRzAnsFlg), 
-                                    .XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
-                                    .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), 
-                                    .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), 
-                                    .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
-                                    .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), 
-                                    .XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm), 
-                                    .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
-                                    .XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
-  readfmavectors readfmaruvectors (.clk, .TestVector(FmaRuVectors[FmaVectorNum]), .Ans(FmaRuAns), .AnsFlg(FmaRuAnsFlg), 
-                                    .XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
-                                    .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), 
-                                    .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), 
-                                    .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
-                                    .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), 
-                                    .XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm), 
-                                    .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
-                                    .XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
-  readfmavectors readfmardvectors (.clk, .TestVector(FmaRdVectors[FmaVectorNum]), .Ans(FmaRdAns), .AnsFlg(FmaRdAnsFlg), 
-                                    .XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
-                                    .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), 
-                                    .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
-                                    .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
-                                    .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), 
-                                    .XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm), 
-                                    .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
-                                    .XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
-  readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[FmaVectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg), 
-                                    .XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
-                                    .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), 
-                                    .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
-                                    .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
-                                    .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), 
-                                    .XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm), 
-                                    .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
-                                    .XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
-                                    .X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                    .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
                                    .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
@ -754,124 +632,30 @@ module testbenchfp;
  ///////////////////////////////////////////////////////////////////////////////////////////////

  // instantiate devices under test
-  //    - one fma for each precison
-  //    - all the units for the other tests (including fma for add/sub/mul)
-  fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn), 
-              .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), 
-              .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
-              .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ), 
-              .NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
-              .ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd)); 
-  fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn), 
-              .ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
-              .XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan), 
-              .XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN), 
-              .XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero), 
-              .XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf), 
-              .XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN), 
-              .KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp), 
-              .SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff), 
-              .PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE), 
-              .FMAFlgM(FmaRneResFlg), .FMAResM(FmaRneRes), .Mult(1'b0));
-  fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), 
-              .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), 
-              .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
-              .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ), 
-              .NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
-              .ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd)); 
-  fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn), 
-              .ZExpM(FmaRzZExp),  .ZDenormM(FmaRzZDenorm),
-              .XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan), 
-              .XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN), 
-              .XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero), 
-              .XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf), 
-              .XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN), 
-              .KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp), 
-              .SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff), 
-              .PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ), 
-              .FMAFlgM(FmaRzResFlg), .FMAResM(FmaRzRes), .Mult(1'b0));
-  fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), 
-              .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), 
-              .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
-              .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ), 
-              .NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
-              .ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd)); 
-  fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn), 
-              .ZExpM(FmaRuZExp),  .ZDenormM(FmaRuZDenorm),
-              .XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan), 
-              .XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN), 
-              .XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero), 
-              .XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf), 
-              .XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN), 
-              .KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp), 
-              .SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff), 
-              .PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU), 
-              .FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
-  fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), 
-              .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), 
-              .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
-              .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ), 
-              .NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
-              .ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd)); 
-  fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn), 
-              .ZExpM(FmaRdZExp),  .ZDenormM(FmaRdZDenorm),
-              .XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan), 
-              .XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN), 
-              .XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero), 
-              .XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf), 
-              .XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN), 
-              .KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp), 
-              .SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff), 
-              .PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD), 
-              .FMAFlgM(FmaRdResFlg), .FMAResM(FmaRdRes), .Mult(1'b0));
-  fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), 
-              .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), 
-              .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
-              .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
-              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ), 
-              .NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
-              .ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd)); 
-  fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn), 
-              .ZExpM(FmaRnmZExp),  .ZDenormM(FmaRnmZDenorm),
-              .XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan), 
-              .XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN), 
-              .XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero), 
-              .XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf), 
-              .XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN), 
-              .KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp), 
-              .SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff), 
-              .PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM), 
-              .FMAFlgM(FmaRnmResFlg), .FMAResM(FmaRnmRes), .Mult(1'b0));  
-  fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), 
+  fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), 
              .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), 
              .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
              .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
-              .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
+              .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
              .ProdExpE, .AddendStickyE, .KillProdE); 
-  fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn), 
-              .ZExpM(ZExp),  .ZDenormM(ZDenorm),
-              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), 
-              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), 
-              .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), 
-              .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), 
-              .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), 
+              
+  postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
+              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
+              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
+              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
+              .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
+              .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
+              .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
              .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), 
-              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
-              .FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
-  // fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf), 
-  //             .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
+              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
+              .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
  
 fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
-            .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
-            .XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), 
-            .CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
+            .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
+            .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
  fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
-              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), 
-              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
+              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
+              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
  // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), 
  //                 .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
  //                 .CvtRes, .CvtFlgE);
@ -900,60 +684,6 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
 ///////////////////////////////////////////////////////////////////////////////////////////////

  //Check if the correct answer and result is a NaN
-  always_comb begin
-    case (FmaFmtVal)
-        4'b11: begin // quad             
-          FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]);
-        end
-        4'b01: begin // double                 
-          FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]);
-        end
-        4'b00: begin // single
-          FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]);
-        end
-        4'b10: begin // half
-          FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]);
-          FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]);
-          FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]);
-          FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]);
-          FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]);
-          FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]);
-          FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]);
-          FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]);
-          FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]);
-          FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]);
-        end
-    endcase
-  end
-
-
  always_comb begin
    if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
      // an integer output can't be a NaN
@ -1004,20 +734,20 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
 always_comb begin
    // select the result to check
    case (UnitVal)
-      `FMAUNIT: Res = FmaRes;
-      `DIVUNIT: Res = DivRes;
+      `FMAUNIT: Res = FpRes;
+      `DIVUNIT: Res = FpRes;
      `CMPUNIT: Res = CmpRes;
-      `CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
-      `CVTFPUNIT: Res = CvtRes;
+      `CVTINTUNIT: if(WriteIntVal) Res = IntRes; else Res = FpRes;
+      `CVTFPUNIT: Res = FpRes;
    endcase

    // select the flag to check
    case (UnitVal)
-      `FMAUNIT: ResFlg = FmaFlg;
-      `DIVUNIT: ResFlg = DivFlg;
+      `FMAUNIT: ResFlg = Flg;
+      `DIVUNIT: ResFlg = Flg;
      `CMPUNIT: ResFlg = CmpFlg;
-      `CVTINTUNIT: ResFlg = CvtFlg;
-      `CVTFPUNIT: ResFlg = CvtFlg;
+      `CVTINTUNIT: ResFlg = Flg;
+      `CVTFPUNIT: ResFlg = Flg;
    endcase
 end
  // check results on falling edge of clk
@ -1027,117 +757,6 @@ end
    // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
    //    - the sign of the NaN does not matter for the opperations being tested
    //    - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
-    case (FmaFmtVal)
-      4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
-      4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
-      4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
-      4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) | 
-                            (FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) | 
-                            (FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
-      4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
-      4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
-      4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) | 
-                            (FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) | 
-                            (FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
-      4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
-      4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
-      4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) | 
-                            (FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) | 
-                            (FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
-      4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
-      4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
-      4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) | 
-                            (FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) | 
-                            (FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
-    endcase
-    case (FmaFmtVal)
-      4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
-      4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
-      4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
-      4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
-                            (FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) | 
-                            (FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) | 
-                            (FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]})));
-    endcase
    if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
      case (FmtVal)
        4'b11: NaNGood =  (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
@ -1221,77 +840,8 @@ end
      $stop;
    end

-    // check if the fma tests are correct
-    if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx)  & (FmaRneResFlg === FmaRneAnsFlg | FmaRneAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RNE");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlg, FmaRneAns, FmaRneAnsFlg);
-      $stop;
-    end
-    if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlg === FmaRzAnsFlg | FmaRzAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RZ");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlg, FmaRzAns, FmaRzAnsFlg);
-      $stop;
-    end
-    if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlg === FmaRuAnsFlg | FmaRuAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RU");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlg, FmaRuAns, FmaRuAnsFlg);
-      $stop;
-    end
-    if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlg === FmaRdAnsFlg | FmaRdAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RD");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlg, FmaRdAns, FmaRdAnsFlg);
-      $stop;
-    end
-    if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlg === FmaRnmAnsFlg | FmaRnmAnsFlg === 5'bx))) begin
-      errors += 1;
-      $display("There is an error in FMA - RNM");
-      $display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlg, FmaRnmAns, FmaRnmAnsFlg);
-      $stop;
-    end

    VectorNum += 1; // increment the vector
-    FmaVectorNum += 1; // increment the vector
-
-    // check to see if there more vectors in this test
-    // *** fix this so that fma and other run sepratly - re-add fma num
-    if ((FmaRneVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRzVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRuVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRdVectors[FmaVectorNum][0] === 1'bx & 
-        FmaRnmVectors[FmaVectorNum][0] === 1'bx & FmaRneTests[FmaTestNum] !== "" )) begin // if reached the end of file
-
-      // increment the test
-      FmaTestNum += 1;
-
-      // clear the vectors
-      for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
-      // read next files
-      $readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
-      $readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
-      $readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
-      $readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
-      $readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
-
-      // set the vector index back to 0
-      FmaVectorNum = 0;
-
-      // if no more Tests - finish
-      if(Tests[TestNum] === "" & 
-        FmaRneTests[FmaTestNum] === "" & 
-        FmaRzTests[FmaTestNum] === "" & 
-        FmaRuTests[FmaTestNum] === "" & 
-        FmaRdTests[FmaTestNum] === "" & 
-        FmaRnmTests[FmaTestNum] === "") begin
-        $display("\nAll Tests completed with %d errors\n", errors);
-        $stop;
-      end 
-
-      $display("Running FMA precision %d", FmaTestNum);
-    end

    if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file

@ -1299,14 +849,9 @@ end
      TestNum += 1;

      // clear the vectors
-      for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
+      for(int i=0; i<6133248; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
      // read next files
      $readmemh({`PATH, Tests[TestNum]}, TestVectors);
-      $readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
-      $readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
-      $readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
-      $readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
-      $readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);

      // set the vector index back to 0
      VectorNum = 0;
@ -1317,12 +862,7 @@ end
      else FrmNum = 0; 

      // if no more Tests - finish
-      if(Tests[TestNum] === "" & 
-        FmaRneTests[FmaTestNum] === "" & 
-        FmaRzTests[FmaTestNum] === "" & 
-        FmaRuTests[FmaTestNum] === "" & 
-        FmaRdTests[FmaTestNum] === "" & 
-        FmaRnmTests[FmaTestNum] === "") begin
+      if(Tests[TestNum] === "") begin
        $display("\nAll Tests completed with %d errors\n", errors);
        $stop;
      end 
@ -1335,89 +875,6 @@ endmodule



-
-
-
-
-
-
-
-
-
-module readfmavectors (
-  input logic                 clk,
-  input logic [`FMTBITS-1:0]  FmaModFmt,              // the modified format
-  input logic [1:0]           FmaFmt,                 // the format of the FMA inputs
-  input logic [`FLEN*4+7:0]   TestVector,             // the test vector
-  output logic [`FLEN-1:0]    Ans,                    // the correct answer
-  output logic [4:0]          AnsFlg,                 // the correct flag
-  output logic                XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
-  output logic [`NE-1:0]      XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
-  output logic [`NF:0]        XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
-  output logic                XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
-  output logic                XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                XDenormE, ZDenormE,   // is XYZ denormalized
-  output logic                XZeroE, YZeroE, ZZeroE,         // is XYZ zero
-  output logic                XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic [`FLEN-1:0]    X, Y, Z                 // inputs
-);
-
-  logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
-  // apply test vectors on rising edge of clk
-  // Format of vectors Inputs(1/2/3)_AnsFlg
-  always @(posedge clk) begin
-    #1; 
-    AnsFlg = TestVector[4:0];
-    case (FmaFmt)
-      2'b11: begin       // quad
-        X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
-        Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
-        Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
-        Ans = TestVector[8+(`Q_LEN-1):8];
-      end
-      2'b01:	begin	  // double
-          X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
-          Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
-          Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
-          Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
-      end
-      2'b00:	begin	  // single
-          X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
-          Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
-          Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
-          Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
-      end
-      2'b10:	begin	  // half
-          X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
-          Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
-          Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
-          Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
-      end
-    endcase
-  end
-  
-  unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
-                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
-                .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
-                .XExpMaxE, .ZDenormE);
-endmodule
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 module readvectors (
  input logic clk,
  input logic [`FLEN*4+7:0] TestVector,
@ -1451,33 +908,61 @@ module readvectors (
      `FMAUNIT:
        case (Fmt)
          2'b11: begin       // quad
-            X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
-            if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
+              Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
+              Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
+            end
+            else begin
+              X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
+              if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
+            end
            Ans = TestVector[8+(`Q_LEN-1):8];
          end
          2'b01:	begin	  // double
-            X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
-            if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; 
-            else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}; 
-            else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
+              Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
+              Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
+            end
+            else begin
+              X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
+              if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; 
+              else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}; 
+              else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
+            end
            Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
          end
          2'b00:	begin	  // single
-            X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
-            if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; 
-            else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}; 
-            else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
+              Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
+              Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
+            end
+            else begin
+              X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
+              if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]}; 
+              else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}; 
+              else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
+            end
            Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
          end
          2'b10:	begin	  // half
-            X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
-            if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; 
-            else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
-            if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}; 
-            else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
+            if(OpCtrl === `FMA_OPCTRL) begin
+              X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
+              Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
+              Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
+            end
+            else begin
+              X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
+              if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; 
+              else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
+              if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}; 
+              else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
+            end
            Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
          end
        endcase
@ -1532,19 +1017,19 @@ module readvectors (
          2'b11: begin       // quad
          case (OpCtrl[1:0])
            2'b11: begin       // quad
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
+              X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
              Ans = TestVector[8+(`Q_LEN-1):8];
            end
            2'b01:	begin	  // double
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
+              X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
              Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
            end
            2'b00:	begin	  // single
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
+              X = {TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
              Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
            end
            2'b10:	begin	  // half
-              X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
+              X = {TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
              Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
            end
          endcase
@ -1628,12 +1113,12 @@ module readvectors (
                Ans = TestVector[8+(`Q_LEN-1):8];
              end
              2'b01:	begin	  // quad -> long
-                X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
+                X = {TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
                SrcA = {`XLEN{1'bx}};
                Ans = {TestVector[8+(`XLEN-1):8]};
              end
              2'b00:	begin	  // quad -> int
-                X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
+                X = {TestVector[8+32+`Q_LEN-1:8+(32)]};
                SrcA = {`XLEN{1'bx}};
                Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
              end
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@ -396,6 +396,7 @@ module riscvassertions;
    assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
    assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
    assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
+    assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
    assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
    assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
    assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
@ -418,6 +419,7 @@ module riscvassertions;
    //assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
    //assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");    
    assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
+    assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
  end
 endmodule

--- a/pipelined/testbench/tests-fp.vh
+++ b/pipelined/testbench/tests-fp.vh
@ -2,7 +2,7 @@
 `define ADD_OPCTRL 3'b110
 `define MUL_OPCTRL 3'b100
 `define SUB_OPCTRL 3'b111
-`define FADD_OPCTRL 3'b000
+`define FMA_OPCTRL 3'b000
 `define DIV_OPCTRL 3'b000
 `define SQRT_OPCTRL 3'b001
 `define LE_OPCTRL 3'b011
@ -21,11 +21,11 @@
 `define RU  3'b011
 `define RD  3'b010
 `define RNM 3'b100
-`define FMAUNIT 0
+`define FMAUNIT 2
 `define DIVUNIT 1
-`define CVTINTUNIT 2
-`define CVTFPUNIT 3
-`define CMPUNIT 4
+`define CVTINTUNIT 0
+`define CVTFPUNIT 4
+`define CMPUNIT 3

 string f16rv32cvtint[] = '{
 	"ui32_to_f16_rne.tv",
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@ -1102,11 +1102,11 @@ string imperas32f[] = '{
    // "rv64i_m/D/d_fdiv_b20-01", // looks like flags
    // "rv64i_m/D/d_fdiv_b2-01", // also flags
    // "rv64i_m/D/d_fdiv_b21-01", // positive NaNs again
-    "rv64i_m/D/d_fdiv_b3-01",
+    // "rv64i_m/D/d_fdiv_b3-01",
    // "rv64i_m/D/d_fdiv_b4-01", // flags
-    "rv64i_m/D/d_fdiv_b5-01",
+    // "rv64i_m/D/d_fdiv_b5-01",
    // "rv64i_m/D/d_fdiv_b6-01", // flags
-    "rv64i_m/D/d_fdiv_b7-01",
+    // "rv64i_m/D/d_fdiv_b7-01",
    // "rv64i_m/D/d_fdiv_b8-01", // flags
    // "rv64i_m/D/d_fdiv_b9-01",  might be a flag too
    "rv64i_m/D/d_feq_b1-01",
--- a/synthDC/ppaAnalyze.py
+++ b/synthDC/ppaAnalyze.py
@ -106,7 +106,7 @@ def getVals(tech, module, var, freq=None):

    if (freq != None):
        for oneSynth in allSynths:
-            if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module):
+            if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module) & (oneSynth.width != 1):
                widthL += [oneSynth.width]
                osdict = oneSynth._asdict()
                metric += [osdict[var]]
@ -151,33 +151,37 @@ def csvOfBest():
    file.close()
    return bestSynths
    
-def genLegend(fits, coefs, r2, spec, ale=False):
-    ''' generates a list of two legend elements 
-        labels line with fit equation and dots with tech and r squared of the fit
+def genLegend(fits, coefs, r2=None, spec=None, ale=False):
+    ''' generates a list of two legend elements (or just an equation if no r2 or spec)
+        labels line with fit equation and dots with r squared of the fit
    '''

-    coefsr = [str(round(c, 3)) for c in coefs]
-
-    eq = ''
-    ind = 0
-
-    eqDict = {'c': '', 'l': 'N', 's': '$N^2$', 'g': '$log_2$(N)', 'n': 'N$log_2$(N)'}
+    coefsr = [str(sigfig(c, 2)) for c in coefs]
    if ale:
        if (normAddWidth == 32):
-            eqDict = {'c': '', 'l': '(N/32)', 's': '$(N/32)^2$', 'g': '$log_2$(N/32)', 'n': '(N/32)$log_2$(N/32)'}
+            sub = 'S'
        elif normAddWidth != 1:
-            print('Legend equations are wrong')
+            print('Equations are wrong, check normAddWidth')
+    else:
+        sub = 'N'
+
+    eqDict = {'c': '', 'l': sub, 's': '$'+sub+'^2$', 'g': '$log_2$('+sub+')', 'n': ''+sub+'$log_2$('+sub+')'}
+    eq = ''
+    ind = 0    

    for k in eqDict.keys():
        if k in fits:
-            if str(coefsr[ind]) != '0.0': eq += " + " + coefsr[ind] + eqDict[k]
+            if str(coefsr[ind]) != '0': eq += " + " + coefsr[ind] + eqDict[k]
            ind += 1

    eq = eq[3:] # chop off leading ' + '

-    legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)]
-    legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label=spec.tech +'  $R^2$='+ str(round(r2, 4)))]
-    return legend_elements
+    if (r2==None) or (spec==None):
+        return eq
+    else:
+        legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)]
+        legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label='$R^2$='+ str(round(r2, 4)))]
+        return legend_elements

 def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, color=None):
    ''' module: string module name
@ -197,9 +201,14 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
    allMetrics = []

    ale = (var != 'delay') # if not delay, must be area, leakage, or energy
-    modFit = fitDict[mod]
+    modFit = fitDict[module]
    fits = modFit[ale]

+    if freq:
+        ls = '--'
+    else:
+        ls = '-'
+
    for spec in techSpecs:
        metric = getVals(spec.tech, module, var, freq=freq)
        
@ -209,26 +218,26 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
            metric = [m/norm for m in metric]

        if len(metric) == 5: # don't include the spec if we don't have points for all widths
-            xp, pred, coefs, r2 = regress(widths, metric, fits)
+            xp, pred, coefs, r2 = regress(widths, metric, fits, ale)
            fullLeg += genLegend(fits, coefs, r2, spec, ale=ale)
            c = color if color else spec.color
            ax.scatter(widths, metric, color=c, marker=spec.shape)
-            ax.plot(xp, pred, color=c)
+            ax.plot(xp, pred, color=c, linestyle=ls)
            allWidths += widths
            allMetrics += metric

-    combined = TechSpec('combined', 'red', '_', 0, 0, 0, 0)
    xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits)
-    leg = genLegend(fits, coefs, r2, combined, ale=ale)
-    fullLeg += leg
-    ax.plot(xp, pred, color='red')
+    ax.plot(xp, pred, color='red', linestyle=ls)

    if norm:
        ylabeldic = {"lpower": "Leakage Power (add32)", "denergy": "Energy/Op (add32)", "area": "Area (add32)", "delay": "Delay (FO4)"}
    else:
        ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (fJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"}

-    ax.legend(handles=fullLeg)
+    # fullLeg += genLegend(fits, coefs, r2, combined, ale=ale)
+    # legLoc = 'upper left' if ale else 'center right'
+    # ax.add_artist(ax.legend(handles=fullLeg, loc=legLoc))
+
    ax.set_xticks(widths)
    ax.set_xlabel("Width (bits)")
    ax.set_ylabel(ylabeldic[var])
@ -243,15 +252,20 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
        ax.set_title(module + titleStr)
        plt.savefig('./plots/PPA/'+ module + '_' + var + '.png')
        # plt.show()
-    return fullLeg
+    return r2

-def regress(widths, var, fits='clsgn'):
+def regress(widths, var, fits='clsgn', ale=False):
    ''' fits a curve to the given points
-        returns lists of x and y values to plot that curve and legend elements with the equation
+        returns lists of x and y values to plot that curve and coefs for the eq with r2
    '''

    funcArr = genFuncs(fits)
-    widths = [w/normAddWidth for w in widths]
+    xp = np.linspace(4, 140, 200)
+    xpToCalc = xp
+
+    if ale:
+        widths = [w/normAddWidth for w in widths]
+        xpToCalc = [x/normAddWidth for x in xp]

    mat = []
    for w in widths:
@ -262,53 +276,91 @@ def regress(widths, var, fits='clsgn'):
    
    y = np.array(var, dtype=np.float)
    coefs = opt.nnls(mat, y)[0]
+
    yp = []
    for w in widths:
        n = [func(w) for func in funcArr]
        yp += [sum(np.multiply(coefs, n))]
    r2 = skm.r2_score(y, yp)

-    xp = np.linspace(4, 140, 200)
    pred = []
-    for x in xp:
-        n = [func(x/normAddWidth) for func in funcArr]
+    for x in xpToCalc:
+        n = [func(x) for func in funcArr]
        pred += [sum(np.multiply(coefs, n))]

    return xp, pred, coefs, r2

 def makeCoefTable():
-    ''' 
-        writes CSV with each line containing the coefficients for a regression fit 
+    ''' writes CSV with each line containing the coefficients for a regression fit 
        to a particular combination of module, metric (including both techs, normalized)
    '''
    file = open("ppaFitting.csv", "w")
    writer = csv.writer(file)
-    writer.writerow(['Module', 'Metric', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
+    writer.writerow(['Module', 'Metric', 'Target', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])

    for module in modules:
-        for var in ['delay', 'area', 'lpower', 'denergy']:
-            ale = (var != 'delay')
-            metL = []
-            modFit = fitDict[module]
-            fits = modFit[ale]
+        for freq in [10, None]:
+            target = 'easy' if freq else 'hard'
+            for var in ['delay', 'area', 'lpower', 'denergy']:
+                ale = (var != 'delay')
+                metL = []
+                modFit = fitDict[module]
+                fits = modFit[ale]

-            for spec in techSpecs:
-                metric = getVals(spec.tech, module, var)
-                techdict = spec._asdict()
-                norm = techdict[var]
-                metL += [m/norm for m in metric]
+                for spec in techSpecs:
+                    metric = getVals(spec.tech, module, var, freq=freq)
+                    techdict = spec._asdict()
+                    norm = techdict[var]
+                    metL += [m/norm for m in metric]

-            xp, pred, coefs, r2 = regress(widths*2, metL, fits)
-            coefs = np.ndarray.tolist(coefs)
-            coefsToWrite  = [None]*5
-            fitTerms = 'clsgn'
-            ind = 0
-            for i in range(len(fitTerms)):
-                if fitTerms[i] in fits:
-                    coefsToWrite[i] = coefs[ind]
-                    ind += 1
-            row = [module, var] + coefsToWrite + [r2]
-            writer.writerow(row)
+                xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale)
+                coefs = np.ndarray.tolist(coefs)
+                coefsToWrite  = [None]*5
+                fitTerms = 'clsgn'
+                ind = 0
+                for i in range(len(fitTerms)):
+                    if fitTerms[i] in fits:
+                        coefsToWrite[i] = coefs[ind]
+                        ind += 1
+                row = [module, var, target] + coefsToWrite + [r2]
+                writer.writerow(row)
+
+    file.close()
+
+def sigfig(num, figs):
+    return '{:g}'.format(float('{:.{p}g}'.format(num, p=figs)))
+
+def makeEqTable():
+    ''' writes CSV with each line containing the equations for fits for each metric 
+        to a particular module (including both techs, normalized)
+    '''
+    file = open("ppaEquations.csv", "w")
+    writer = csv.writer(file)
+    writer.writerow(['Element', 'Best delay', 'Fast area', 'Fast leakage', 'Fast energy', 'Small area', 'Small leakage', 'Small energy'])
+
+    for module in modules:
+        eqs = []
+        for freq in [None, 10]:
+            for var in ['delay', 'area', 'lpower', 'denergy']:
+                if (var == 'delay') and (freq == 10):
+                    pass
+                else:
+                    ale = (var != 'delay')
+                    metL = []
+                    modFit = fitDict[module]
+                    fits = modFit[ale]
+
+                    for spec in techSpecs:
+                        metric = getVals(spec.tech, module, var, freq=freq)
+                        techdict = spec._asdict()
+                        norm = techdict[var]
+                        metL += [m/norm for m in metric]
+
+                    xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale)
+                    coefs = np.ndarray.tolist(coefs)
+                    eqs += [genLegend(fits, coefs, ale=ale)]
+        row = [module] + eqs
+        writer.writerow(row)

    file.close()

@ -369,7 +421,7 @@ def freqPlot(tech, mod, width):
        delays = delaysL[ind]
        freqs = freqsL[ind]

-        freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses
+        # freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses

        c = 'blue' if ind else 'green'
        # adprod = adprodpow(areas, delays, 1)
@ -383,14 +435,18 @@ def freqPlot(tech, mod, width):
                       lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')]

    ax1.legend(handles=legend_elements)
+    width = str(width)
    
    ax2.set_xlabel("Target Freq (MHz)")
    ax1.set_ylabel('Delay (ns)')
    ax2.set_ylabel('Area (sq microns)')
    # ax3.set_ylabel('Area * Delay')
    # ax4.set_ylabel('Area * $Delay^2$')
-    ax1.set_title(mod + '_' + str(width))
-    plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + str(width) + '.png')
+    ax1.set_title(mod + '_' + width)
+    if ('mux' in mod) & ('d' in mod):
+        width = mod
+        mod = 'muxd'
+    plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + width + '.png')
    # plt.show()

 def squareAreaDelay(tech, mod, width):
@ -485,30 +541,35 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False):
    '''
    plt.rcParams["figure.figsize"] = (10,7)
    fig, axs = plt.subplots(2, 2)
-    # fig, axs = plt.subplots(4, 1)

-    # oneMetricPlot(mod, 'delay', ax=axs[0], fits=modFit[0], freq=freq, norm=norm)
-    # oneMetricPlot(mod, 'area', ax=axs[1], fits=modFit[1], freq=freq, norm=norm)
-    # oneMetricPlot(mod, 'lpower', ax=axs[2], fits=modFit[1], freq=freq, norm=norm)
-    # oneMetricPlot(mod, 'denergy', ax=axs[3], fits=modFit[1], freq=freq, norm=norm)
-    oneMetricPlot(mod, 'delay', ax=axs[0,0], freq=freq, norm=norm)
-    oneMetricPlot(mod, 'area', ax=axs[0,1], freq=freq, norm=norm)
-    oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=freq, norm=norm)
-    fullLeg = oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=freq, norm=norm)
+    arr = [['delay', 'area'], ['lpower', 'denergy']]
+
+    freqs = [freq]
+    if aleOpt: freqs += [10]
+
+    for i in [0, 1]:
+        for j in [0, 1]:
+            leg = []
+            for f in freqs:
+                if (arr[i][j]=='delay') and (f==10):
+                    pass
+                else:
+                    r2 = oneMetricPlot(mod, arr[i][j], ax=axs[i, j], freq=f, norm=norm)
+                    ls = '--' if f else '-'
+                    leg += [lines.Line2D([0], [0], color='red', label='$R^2$='+str(round(r2, 4)), linestyle=ls)]
+            axs[i, j].legend(handles=leg)
    
-    if aleOpt:
-        oneMetricPlot(mod, 'area', ax=axs[0,1], freq=10, norm=norm, color='black')
-        oneMetricPlot(mod, 'lpower', ax=axs[1,0], freq=10, norm=norm, color='black')
-        oneMetricPlot(mod, 'denergy', ax=axs[1,1], freq=10, norm=norm, color='black')
-    
-    titleStr = "  (target  " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
-    n = 'normalized' if norm else 'unnormalized'
-    saveStr = './plots/PPA/'+ n + '/' + mod + '.png'
+    titleStr = "  (target  " + str(freq)+ "MHz)" if freq != None else ""
    plt.suptitle(mod + titleStr)

-    # fig.legend(handles=fullLeg, ncol=3, loc='center', bbox_to_anchor=(0.3, 0.82, 0.4, 0.2))
+    fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')]
+    fullLeg += [lines.Line2D([0], [0], color='black', label='smallest', linestyle='--')]
+    fig.legend(handles=fullLeg, ncol=3, loc='center', bbox_to_anchor=(0.3, 0.82, 0.4, 0.2))

-    if freq != 10: plt.savefig(saveStr)
+    if freq != 10: 
+        n = 'normalized' if norm else 'unnormalized'
+        saveStr = './plots/PPA/'+ n + '/' + mod + '.png'
+        plt.savefig(saveStr)
    # plt.show()

 def plotBestAreas(mod):
@ -533,16 +594,17 @@ if __name__ == '__main__':
    ##############################
    # set up stuff, global variables
    widths = [8, 16, 32, 64, 128]
-    modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult']
+    modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult'] #, 'mux2d', 'mux4d', 'mux8d',]
    normAddWidth = 32 # divisor to use with N since normalizing to add_32

-    fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 's', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
+    fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 'ls', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
    fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l']))
    leftblue = [['mux2', 'sky90', 32], ['mux2', 'sky90', 64], ['mux2', 'sky90', 128], ['mux8', 'sky90', 32], ['mux2', 'tsmc28', 8], ['mux2', 'tsmc28', 64]] 

    TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy")
    techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1330.84, 582.81, 520.66],  ['tsmc28', 'blue', '^', 12.2e-3, 209.29, 1060, 81.43]]
    techSpecs = [TechSpec(*t) for t in techSpecs]
+    combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0)
    # invz1arealeakage = [['sky90', 1.96, 1.98], ['gf32', .351, .3116], ['tsmc28', .252, 1.09]] #['gf32', 'purple', 's', 15e-3]
    ##############################

@ -556,13 +618,14 @@ if __name__ == '__main__':
    # squareAreaDelay('sky90', 'add', 32)
    # oneMetricPlot('add', 'delay')
    # freqPlot('sky90', 'mux4', 16)
+    # plotBestAreas('add')
    # makeCoefTable()
+    # makeEqTable()
    
-    for mod in ['mux2']: #modules:
+    for mod in modules:
        plotPPA(mod, norm=False)
-        plotPPA(mod) #, aleOpt=True)
-        # plotBestAreas(mod)
-        # for w in [8, 16, 32, 64, 128]:
-        #     freqPlot('sky90', mod, w)
-        #     freqPlot('tsmc28', mod, w)
+        plotPPA(mod, aleOpt=True)
+        for w in [8, 16, 32, 64, 128]:
+            freqPlot('sky90', mod, w)
+            freqPlot('tsmc28', mod, w)
        plt.close('all')
--- a/synthDC/ppaEquations.csv
+++ b/synthDC/ppaEquations.csv
@ -0,0 +1,11 @@
+Element,Best delay,Fast area,Fast leakage,Fast energy,Small area,Small leakage,Small energy
+priorityencoder,0.98$log_2$(N),0.33S,0.25S,0.093S,0.15S,0.046S,0.00046S
+add,1.8 + 1.4$log_2$(N),1.1S,0.95S,1S,0.34S,0.16S,0.025S
+csa,3.6,0.93S,1.5S,1.1S,0.34S,0.16S,0.00055S
+shiftleft,0.48 + 1.6$log_2$(N),1.9S,2.3S,1.5S,0.8S,0.29S,0.0059S
+comparator,2 + 0.94$log_2$(N),0.6S,0.47S,0.31S,0.34S,0.16S,0.00089S
+flop,3.3,0.34S,0.37S,0.0012S,0.34S,0.37S,0.0012S
+mux2,2.8 + 0.38$log_2$(N),0.2S,0.18S,0.16S,0.15S,0.12S,0.0011S
+mux4,3.1 + 0.51$log_2$(N),0.36S,0.32S,0.28S,0.28S,0.11S,0.0021S
+mux8,5 + 0.45$log_2$(N),0.76S,0.66S,0.45S,0.55S,0.24S,0.0029S
+mult,6$log_2$(N),13S + 10$S^2$,26S + 7.3$S^2$,42S + 25$S^2$,1.1S + 7.9$S^2$,1S + 3.4$S^2$,2.1$S^2$
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@ -74,7 +74,7 @@ if { $saifpower == 1 } {
 if {$drive != "INV"} {
    set_false_path -from [get_ports reset]
 }
-if {(($::env(DESIGN) == "ppa_mux2_1") || ($::env(DESIGN) == "ppa_mux4_1") || ($::env(DESIGN) == "ppa_mux8_1"))} {
+if {(($::env(DESIGN) == "ppa_mux2d_1") || ($::env(DESIGN) == "ppa_mux4d_1") || ($::env(DESIGN) == "ppa_mux8d_1"))} {
    set_false_path -from {s}
 }

--- a/tests/fp/create_vectors.sh
+++ b/tests/fp/create_vectors.sh
@ -2,482 +2,482 @@
 BUILD="../../addins/TestFloat-3e/build/Linux-x86_64-GCC"
 OUTPUT="./vectors"
 echo "Creating ui32_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
 echo "Creating ui32_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
 echo "Creating ui32_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
 echo "Creating ui32_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
 echo "Creating ui64_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
 echo "Creating ui64_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
 echo "Creating ui64_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
 echo "Creating ui64_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
 echo "Creating i32_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
 echo "Creating i32_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
 echo "Creating i32_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
 echo "Creating i32_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
 echo "Creating i64_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
 echo "Creating i64_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
 echo "Creating i64_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
 echo "Creating i64_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
 echo "Creating f16_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
 echo "Creating f32_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
 echo "Creating f64_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
 echo "Creating f128_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
 echo "Creating f16_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
 echo "Creating f32_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
 echo "Creating f64_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
 echo "Creating f128_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
 echo "Creating f16_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
 echo "Creating f32_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
 echo "Creating f64_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
 echo "Creating f128_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
 echo "Creating f16_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
 echo "Creating f32_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
 echo "Creating f64_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
 echo "Creating f128_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
 echo "Creating f16_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
 echo "Creating f16_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
 echo "Creating f16_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
 echo "Creating f32_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
 echo "Creating f32_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
 echo "Creating f32_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
 echo "Creating f64_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
 echo "Creating f64_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
 echo "Creating f64_to_f128 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
 echo "Creating f128_to_f16 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
 echo "Creating f128_to_f32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
 echo "Creating f128_to_f64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
 echo "Creating f16_add vectors"
-$BUILD/testfloat_gen -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
-$BUILD/testfloat_gen -rminMag f16_add > $OUTPUT/f16_add_rz.tv
-$BUILD/testfloat_gen -rmax f16_add > $OUTPUT/f16_add_ru.tv
-$BUILD/testfloat_gen -rmin f16_add > $OUTPUT/f16_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_add > $OUTPUT/f16_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_add > $OUTPUT/f16_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_add > $OUTPUT/f16_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
 echo "Creating f32_add vectors"
-$BUILD/testfloat_gen -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
-$BUILD/testfloat_gen -rminMag f32_add > $OUTPUT/f32_add_rz.tv
-$BUILD/testfloat_gen -rmax f32_add > $OUTPUT/f32_add_ru.tv
-$BUILD/testfloat_gen -rmin f32_add > $OUTPUT/f32_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_add > $OUTPUT/f32_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_add > $OUTPUT/f32_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_add > $OUTPUT/f32_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
 echo "Creating f64_add vectors"
-$BUILD/testfloat_gen -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
-$BUILD/testfloat_gen -rminMag f64_add > $OUTPUT/f64_add_rz.tv
-$BUILD/testfloat_gen -rmax f64_add > $OUTPUT/f64_add_ru.tv
-$BUILD/testfloat_gen -rmin f64_add > $OUTPUT/f64_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_add > $OUTPUT/f64_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_add > $OUTPUT/f64_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_add > $OUTPUT/f64_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
 echo "Creating f128_add vectors"
-$BUILD/testfloat_gen -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
-$BUILD/testfloat_gen -rminMag f128_add > $OUTPUT/f128_add_rz.tv
-$BUILD/testfloat_gen -rmax f128_add > $OUTPUT/f128_add_ru.tv
-$BUILD/testfloat_gen -rmin f128_add > $OUTPUT/f128_add_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_add > $OUTPUT/f128_add_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_add > $OUTPUT/f128_add_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_add > $OUTPUT/f128_add_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
 echo "Creating f16_sub vectors"
-$BUILD/testfloat_gen -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
-$BUILD/testfloat_gen -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
-$BUILD/testfloat_gen -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
 echo "Creating f32_sub vectors"
-$BUILD/testfloat_gen -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
-$BUILD/testfloat_gen -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
-$BUILD/testfloat_gen -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
 echo "Creating f64_sub vectors"
-$BUILD/testfloat_gen -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
-$BUILD/testfloat_gen -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
-$BUILD/testfloat_gen -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
 echo "Creating f128_sub vectors"
-$BUILD/testfloat_gen -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
-$BUILD/testfloat_gen -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
-$BUILD/testfloat_gen -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
-$BUILD/testfloat_gen -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
 echo "Creating f16_mul vectors"
-$BUILD/testfloat_gen -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
-$BUILD/testfloat_gen -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
-$BUILD/testfloat_gen -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
 echo "Creating f32_mul vectors"
-$BUILD/testfloat_gen -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
-$BUILD/testfloat_gen -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
-$BUILD/testfloat_gen -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
 echo "Creating f64_mul vectors"
-$BUILD/testfloat_gen -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
-$BUILD/testfloat_gen -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
-$BUILD/testfloat_gen -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
 echo "Creating f128_mul vectors"
-$BUILD/testfloat_gen -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
-$BUILD/testfloat_gen -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
-$BUILD/testfloat_gen -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
-$BUILD/testfloat_gen -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
 echo "Creating f16_div vectors"
-$BUILD/testfloat_gen -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
-$BUILD/testfloat_gen -rminMag f16_div > $OUTPUT/f16_div_rz.tv
-$BUILD/testfloat_gen -rmax f16_div > $OUTPUT/f16_div_ru.tv
-$BUILD/testfloat_gen -rmin f16_div > $OUTPUT/f16_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_div > $OUTPUT/f16_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_div > $OUTPUT/f16_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_div > $OUTPUT/f16_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
 echo "Creating f32_div vectors"
-$BUILD/testfloat_gen -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
-$BUILD/testfloat_gen -rminMag f32_div > $OUTPUT/f32_div_rz.tv
-$BUILD/testfloat_gen -rmax f32_div > $OUTPUT/f32_div_ru.tv
-$BUILD/testfloat_gen -rmin f32_div > $OUTPUT/f32_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_div > $OUTPUT/f32_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_div > $OUTPUT/f32_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_div > $OUTPUT/f32_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
 echo "Creating f64_div vectors"
-$BUILD/testfloat_gen -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
-$BUILD/testfloat_gen -rminMag f64_div > $OUTPUT/f64_div_rz.tv
-$BUILD/testfloat_gen -rmax f64_div > $OUTPUT/f64_div_ru.tv
-$BUILD/testfloat_gen -rmin f64_div > $OUTPUT/f64_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_div > $OUTPUT/f64_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_div > $OUTPUT/f64_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_div > $OUTPUT/f64_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
 echo "Creating f128_div vectors"
-$BUILD/testfloat_gen -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
-$BUILD/testfloat_gen -rminMag f128_div > $OUTPUT/f128_div_rz.tv
-$BUILD/testfloat_gen -rmax f128_div > $OUTPUT/f128_div_ru.tv
-$BUILD/testfloat_gen -rmin f128_div > $OUTPUT/f128_div_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_div > $OUTPUT/f128_div_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_div > $OUTPUT/f128_div_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_div > $OUTPUT/f128_div_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
 echo "Creating f16_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
 echo "Creating f32_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
 echo "Creating f64_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
 echo "Creating f128_sqrt vectors"
-$BUILD/testfloat_gen -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
-$BUILD/testfloat_gen -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
-$BUILD/testfloat_gen -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
-$BUILD/testfloat_gen -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
 echo "Creating f16_eq vectors"
-$BUILD/testfloat_gen -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
-$BUILD/testfloat_gen -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
-$BUILD/testfloat_gen -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
 echo "Creating f32_eq vectors"
-$BUILD/testfloat_gen -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
-$BUILD/testfloat_gen -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
-$BUILD/testfloat_gen -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
 echo "Creating f64_eq vectors"
-$BUILD/testfloat_gen -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
-$BUILD/testfloat_gen -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
-$BUILD/testfloat_gen -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
 echo "Creating f128_eq vectors"
-$BUILD/testfloat_gen -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
-$BUILD/testfloat_gen -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
-$BUILD/testfloat_gen -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
-$BUILD/testfloat_gen -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
 echo "Creating f16_le vectors"
-$BUILD/testfloat_gen -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
-$BUILD/testfloat_gen -rminMag f16_le > $OUTPUT/f16_le_rz.tv
-$BUILD/testfloat_gen -rmax f16_le > $OUTPUT/f16_le_ru.tv
-$BUILD/testfloat_gen -rmin f16_le > $OUTPUT/f16_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_le > $OUTPUT/f16_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_le > $OUTPUT/f16_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_le > $OUTPUT/f16_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
 echo "Creating f32_le vectors"
-$BUILD/testfloat_gen -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
-$BUILD/testfloat_gen -rminMag f32_le > $OUTPUT/f32_le_rz.tv
-$BUILD/testfloat_gen -rmax f32_le > $OUTPUT/f32_le_ru.tv
-$BUILD/testfloat_gen -rmin f32_le > $OUTPUT/f32_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_le > $OUTPUT/f32_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_le > $OUTPUT/f32_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_le > $OUTPUT/f32_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
 echo "Creating f64_le vectors"
-$BUILD/testfloat_gen -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
-$BUILD/testfloat_gen -rminMag f64_le > $OUTPUT/f64_le_rz.tv
-$BUILD/testfloat_gen -rmax f64_le > $OUTPUT/f64_le_ru.tv
-$BUILD/testfloat_gen -rmin f64_le > $OUTPUT/f64_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_le > $OUTPUT/f64_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_le > $OUTPUT/f64_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_le > $OUTPUT/f64_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
 echo "Creating f128_le vectors"
-$BUILD/testfloat_gen -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
-$BUILD/testfloat_gen -rminMag f128_le > $OUTPUT/f128_le_rz.tv
-$BUILD/testfloat_gen -rmax f128_le > $OUTPUT/f128_le_ru.tv
-$BUILD/testfloat_gen -rmin f128_le > $OUTPUT/f128_le_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_le > $OUTPUT/f128_le_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_le > $OUTPUT/f128_le_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_le > $OUTPUT/f128_le_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
 echo "Creating f16_lt vectors"
-$BUILD/testfloat_gen -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
-$BUILD/testfloat_gen -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
-$BUILD/testfloat_gen -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
 echo "Creating f32_lt vectors"
-$BUILD/testfloat_gen -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
-$BUILD/testfloat_gen -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
-$BUILD/testfloat_gen -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
 echo "Creating f64_lt vectors"
-$BUILD/testfloat_gen -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
-$BUILD/testfloat_gen -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
-$BUILD/testfloat_gen -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
 echo "Creating f128_lt vectors"
-$BUILD/testfloat_gen -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
-$BUILD/testfloat_gen -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
-$BUILD/testfloat_gen -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
-$BUILD/testfloat_gen -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
 echo "Creating f16_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
 echo "Creating f32_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
 echo "Creating f64_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
 echo "Creating f128_mulAdd vectors"
-$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
-$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
-$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
-$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
+$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv