FMA parameterized and FMA testbench reworked

2025-02-11 06:05:49 +00:00 · 2022-03-19 19:39:03 +00:00 · 2022-03-19 19:39:03 +00:00 · e3d01c875b
commit e3d01c875b
parent d43e868e5f
23 changed files with 3927 additions and 412 deletions
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
--- a/pipelined/config/rv64fp/BTBPredictor.txt
+++ b/pipelined/config/rv64fp/BTBPredictor.txt
--- a/pipelined/config/rv64fp/twoBitPredictor.txt
+++ b/pipelined/config/rv64fp/twoBitPredictor.txt
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@ -0,0 +1,134 @@
 //////////////////////////////////////////
 // wally-config.vh
 //
 // Written: David_Harris@hmc.edu 4 January 2021
 // Modified: 
 //
 // Purpose: Specify which features are configured
 //          Macros to determine which modes are supported based on MISA
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 // include shared configuration
 `include "wally-shared.vh"
 `define FPGA 0
 `define QEMU 0
 `define DESIGN_COMPILER 0
 // RV32 or RV64: XLEN = 32 or 64
 `define XLEN 64
 // IEEE 754 compliance
 `define IEEE754 1
 // MISA RISC-V configuration per specification
 `define MISA (32'h00000104 | 1 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
 `define ZICSR_SUPPORTED 1
 `define ZIFENCEI_SUPPORTED 1
 `define COUNTERS 32
 `define ZICOUNTERS_SUPPORTED 1
 /// Microarchitectural Features
 `define UARCH_PIPELINED 1
 `define UARCH_SUPERSCALR 0
 `define UARCH_SINGLECYCLE 0
 `define DMEM `MEM_CACHE
 `define IMEM `MEM_CACHE
 `define VIRTMEM_SUPPORTED 1
 `define VECTORED_INTERRUPTS_SUPPORTED 1 
 // TLB configuration.  Entries should be a power of 2
 `define ITLB_ENTRIES 32
 `define DTLB_ENTRIES 32
 // Cache configuration.  Sizes should be a power of two
 // typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
 `define DCACHE_NUMWAYS 4
 `define DCACHE_WAYSIZEINBYTES 4096
 `define DCACHE_LINELENINBITS 256
 `define ICACHE_NUMWAYS 4
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_LINELENINBITS 256
 // Integer Divider Configuration
 // DIV_BITSPERCYCLE must be 1, 2, or 4
 `define DIV_BITSPERCYCLE 4
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
 // Address space
 `define RESET_VECTOR 64'h0000000080000000
 // Bus Interface width
 `define AHBW 64
 // Peripheral Physiccal Addresses
 // Peripheral memory space extends from BASE to BASE+RANGE
 // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
 // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
 `define BOOTROM_SUPPORTED 1'b1
 `define BOOTROM_BASE   56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
 `define BOOTROM_RANGE  56'h00000FFF
 `define RAM_SUPPORTED 1'b1
 `define RAM_BASE       56'h80000000
 `define RAM_RANGE      56'h7FFFFFFF
 `define EXT_MEM_SUPPORTED 1'b0
 `define EXT_MEM_BASE       56'h80000000
 `define EXT_MEM_RANGE      56'h07FFFFFF
 `define CLINT_SUPPORTED 1'b1
 `define CLINT_BASE  56'h02000000
 `define CLINT_RANGE 56'h0000FFFF
 `define GPIO_SUPPORTED 1'b1
 `define GPIO_BASE   56'h10060000
 `define GPIO_RANGE  56'h000000FF
 `define UART_SUPPORTED 1'b1
 `define UART_BASE   56'h10000000
 `define UART_RANGE  56'h00000007
 `define PLIC_SUPPORTED 1'b1
 `define PLIC_BASE   56'h0C000000
 `define PLIC_RANGE  56'h03FFFFFF
 `define SDC_SUPPORTED 1'b0
 `define SDC_BASE   56'h00012100
 `define SDC_RANGE  56'h0000001F
 // Test modes
 // Tie GPIO outputs back to inputs
 `define GPIO_LOOPBACK_TEST 1
 // Hardware configuration
 `define UART_PRESCALE 1
 // Interrupt configuration
 `define PLIC_NUM_SRC 10
 // comment out the following if >=32 sources
 `define PLIC_NUM_SRC_LT_32
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 `define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
 `define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
 `define REPLAY 0
 `define HPTW_WRITES_SUPPORTED 0
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -50,10 +50,47 @@
 // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
 `define PMPCFG_ENTRIES (`PMP_ENTRIES/8)
 // Floating-point half-precision
 `define ZFH_SUPPORTED 0
 // Floating point constants for Quad, Double, Single, and Half precisions
 `define Q_LEN 128
 `define Q_NE 15
 `define Q_NF 112
 `define Q_BIAS 16383
 `define D_LEN 64
 `define D_NE 11
 `define D_NF 52
 `define D_BIAS 1023
 `define S_LEN 32
 `define S_NE 8
 `define S_NF 23
 `define S_BIAS 127
 `define H_LEN 16
 `define H_NE 5
 `define H_NF 10
 `define H_BIAS 15
 // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
-`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
+`define FLEN (`Q_SUPPORTED ? `Q_LEN  : `D_SUPPORTED ? `D_LEN  : `F_SUPPORTED ? `S_LEN  : `H_LEN)
-`define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
+`define NE   (`Q_SUPPORTED ? `Q_NE   : `D_SUPPORTED ? `D_NE   : `F_SUPPORTED ? `S_NE   : `H_NE)
-`define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
+`define NF   (`Q_SUPPORTED ? `Q_NF   : `D_SUPPORTED ? `D_NF   : `F_SUPPORTED ? `S_NF   : `H_NF)
 `define FMT  (`Q_SUPPORTED ? 3       : `D_SUPPORTED ? 1       : `F_SUPPORTED ? 0       : 2)
 `define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)
 // Floating point constants needed for FPU paramerterization
 `define FPSIZES (`Q_SUPPORTED+`D_SUPPORTED+`F_SUPPORTED+`ZFH_SUPPORTED)
 `define LEN1  ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN  : `H_LEN)
 `define NE1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE  : `H_NE)
 `define NF1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF  : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
 `define FMT1  ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 1        : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 0       : 2)
 `define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS  : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS)
 `define LEN2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN   : `H_LEN)
 `define NE2   ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE   : `H_NE)
 `define NF2   ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF  : `H_NF)
 `define FMT2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 0        : 2)
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS  : `H_BIAS)
 // Disable spurious Verilator warnings
--- a/pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@ -1,10 +1,33 @@
-//`include "../../../config/old/rv64icfd/wally-config.vh"
+`include "../../../config/old/rv64icfd/wally-config.vh"
-`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
+// `define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : `F_SUPPORTED ? 32 : 16)
-`define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
+// `define NE   (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : `F_SUPPORTED ? 8 : 5)
-`define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
+// `define NF   (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : `F_SUPPORTED ? 23 : 10)
-`define XLEN 64
+// `define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2)
 // `define BIAS (`Q_SUPPORTED ? 16383 : `D_SUPPORTED ? 1023 : `F_SUPPORTED ? 127 : 15)
 // `define XLEN 64
 // `define IEEE754 1
 `define Q_SUPPORTED 1
 // `define D_SUPPORTED 0
 // `define F_SUPPORTED 0
 `define H_SUPPORTED 0
 `define FPSIZES ((`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 4 : (`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`D_SUPPORTED&`H_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 3 : (`Q_SUPPORTED&`D_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED) | (`D_SUPPORTED&`H_SUPPORTED) | (`F_SUPPORTED&`H_SUPPORTED) ? 2 : 1)
 `define LEN1  ((`D_SUPPORTED & (`FLEN !== 64)) ? 64   : (`F_SUPPORTED & (`FLEN !== 32)) ? 32  : 16)
 `define NE1   ((`D_SUPPORTED & (`FLEN !== 64)) ? 11   : (`F_SUPPORTED & (`FLEN !== 32)) ? 8   : 5)
 `define NF1   ((`D_SUPPORTED & (`FLEN !== 64)) ? 52   : (`F_SUPPORTED & (`FLEN !== 32)) ? 23  : 10)
 `define FMT1  ((`D_SUPPORTED & (`FLEN !== 64)) ? 1    : (`F_SUPPORTED & (`FLEN !== 32)) ? 0   : 2)
 `define BIAS1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 1023 : (`F_SUPPORTED & (`FLEN !== 32)) ? 127 : 15)
 `define LEN2  ((`F_SUPPORTED & (`LEN1 !== 32)) ? 32   : 16)
 `define NE2   ((`F_SUPPORTED & (`LEN1 !== 32)) ? 8    : 5)
 `define NF2   ((`F_SUPPORTED & (`LEN1 !== 32)) ? 23   : 10)
 `define FMT2  ((`F_SUPPORTED & (`LEN1 !== 32)) ? 0    : 2)
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 127  : 15)
 `define LEN3 16
 `define NE3 5//make constants for the constants ie 11/8/5 ect
 `define NF3 10 // always support less hten max - maybe halfs
 `define FMT3 2
 `define BIAS3 15
 module testbench3();
 logic [31:0] errors=0;
@ -15,33 +38,17 @@ module testbench3();
 logic 	[`FLEN-1:0]		ans;
 logic 	[7:0]	 	flags;
 logic 	[2:0]		FrmE;
- logic				FmtE;
+ logic	[`FPSIZES/3:0]			FmtE;
 logic  [`FLEN-1:0]      FMAResM;
 logic  [4:0]       FMAFlgM;
 integer fp;
 logic 	[2:0]		FOpCtrlE;
 logic 		[2*`NF+1:0]		ProdManE; 
 logic 		[3*`NF+5:0]		AlignedAddendE;	
 logic 		[`NE+1:0]		ProdExpE; 
 logic 					AddendStickyE;
 logic 					KillProdE; 
 // logic					XZeroE;
 // logic					YZeroE;
 // logic					ZZeroE;
 // logic					XDenormE;
 // logic					YDenormE;
 // logic					ZDenormE;
 // logic					XInfE;
 // logic					YInfE;
 // logic					ZInfE;
 // logic					XNaNE;
 // logic					YNaNE;
 // logic					ZNaNE;
 logic wnan;
 // logic XNaNE;
 // logic YNaNE;
 // logic ZNaNE;
 logic ansnan, clk;
@ -52,88 +59,86 @@ assign FOpCtrlE = 3'b0;
 // down - 010
 // up - 011
 // nearest max mag - 100  
-assign FrmE = 3'b000;
+assign FrmE = 3'b010;
-assign FmtE = 1'b1;
+assign FmtE = (`FPSIZES/3+1)'(1);
    logic  [`FLEN-1:0] X, Y, Z;
    // logic         FmtE;
    // logic  [2:0]  FOpCtrlE;
    logic        XSgnE, YSgnE, ZSgnE;
    logic [`NE-1:0] XExpE, YExpE, ZExpE;
-    logic [`NF-1:0] XFracE, YFracE, ZFracE;
+    logic [`NF:0] XManE, YManE, ZManE;
    logic        XAssumed1E, YAssumed1E, ZAssumed1E;
    logic XNormE;
    logic XExpMaxE;
    logic XNaNE, YNaNE, ZNaNE;
    logic XSNaNE, YSNaNE, ZSNaNE;
    logic XDenormE, YDenormE, ZDenormE;
    logic XZeroE, YZeroE, ZZeroE;
    logic [`NE-1:0] BiasE;
    logic XInfE, YInfE, ZInfE;
    logic XExpMaxE;
 //***rename to make significand = 1.frac m = significand
    logic           XFracZero, YFracZero, ZFracZero; // input fraction zero
    logic           XExpZero, YExpZero, ZExpZero; // input exponent zero
    logic [`FLEN-1:0]    Addend; // value to add (Z or zero)
-    logic           YExpMaxE, ZExpMaxE;  // input exponent all 1s
+    logic           YExpMaxE, ZExpMaxE, Mult;  // input exponent all 1s
-    assign Addend = FOpCtrlE[2] ? (`FLEN)'(0) : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation
+	assign Mult = 1'b0;
-    assign XSgnE = FmtE ? X[`FLEN-1] : X[31];
+  unpacking unpacking(.*);
    assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31];
    assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31];
-    assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
+// assign	wnan = XNaNE|YNaNE|ZNaNE; 
-    assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
+// assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0]; 
    assign ZExpE = FmtE ? Addend[62:52] : {Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; 
-    assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0};
+    if (`FPSIZES === 1) begin
-    assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0};
+      assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
-    assign ZFracE = FmtE ? Addend[`NF-1:0] : {Addend[22:0], 29'b0};
+      assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
    end else if (`FPSIZES === 2) begin                  
      assign ansnan = FmtE ? &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]) : &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
      assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]) : &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
    end else if (`FPSIZES === 3) begin
        always_comb begin
            case (FmtE)
                `FMT: begin                  
                  assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
                  assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
-    assign XAssumed1E = FmtE ? |X[62:52] : |X[30:23]; 
+                end
-    assign YAssumed1E = FmtE ? |Y[62:52] : |Y[30:23];
+                `FMT1: begin                    
-    assign ZAssumed1E = FmtE ? |Z[62:52] : |Z[30:23];
+                  assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
                  assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
-    assign XExpZero = ~XAssumed1E;
+                end
-    assign YExpZero = ~YAssumed1E;
+                `FMT2: begin
-    assign ZExpZero = ~ZAssumed1E;
+                    assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
                    assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
                end
                default: begin
                    assign ansnan = 0;
                    assign wnan = 0;
                end
            endcase
        end
-    assign XFracZero = ~|XFracE;
+    end else begin
-    assign YFracZero = ~|YFracE;
+        always_comb begin
-    assign ZFracZero = ~|ZFracE;
+            case (FmtE)
                `FMT: begin                  
                  assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
                  assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
-    assign XExpMaxE = FmtE ? &X[62:52] : &X[30:23];
+                end
-    assign YExpMaxE = FmtE ? &Y[62:52] : &Y[30:23];
+                `FMT1: begin                    
-    assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23];
+                  assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
                  assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
-    assign XNormE = ~(XExpMaxE|XExpZero);
+                end
-    
+                `FMT2: begin
-    assign XNaNE = XExpMaxE & ~XFracZero;
+                    assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
-    assign YNaNE = YExpMaxE & ~YFracZero;
+                    assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
-    assign ZNaNE = ZExpMaxE & ~ZFracZero;
+                end
-
+                `FMT3: begin
-    assign XSNaNE = XNaNE&~XFracE[`NF-1];
+                    assign ansnan = &ans[`LEN3-2:`NF3]&(|ans[`NF3-1:0]);
-    assign YSNaNE = YNaNE&~YFracE[`NF-1];
+                    assign wnan = &FMAResM[`LEN3-2:`NF3]&(|FMAResM[`NF3-1:0]);
-    assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
+                end
-
+            endcase
-    assign XDenormE = XExpZero & ~XFracZero;
+        end
-    assign YDenormE = YExpZero & ~YFracZero;
+    end
    assign ZDenormE = ZExpZero & ~ZFracZero;
    assign XInfE = XExpMaxE & XFracZero;
    assign YInfE = YExpMaxE & YFracZero;
    assign ZInfE = ZExpMaxE & ZFracZero;
    assign XZeroE = XExpZero & XFracZero;
    assign YZeroE = YExpZero & YFracZero;
    assign ZZeroE = ZExpZero & ZFracZero;
    assign BiasE = 13'h3ff;
 assign	wnan = FmtE ? &FMAResM[`FLEN-2:`NF] & |FMAResM[`NF-1:0] : &FMAResM[30:23] & |FMAResM[22:0]; 
 // assign	XNaNE = FmtE ? &X[62:52] & |X[51:0] : &X[62:55] & |X[54:32]; 
 // assign	YNaNE = FmtE ? &Y[62:52] & |Y[51:0] : &Y[62:55] & |Y[54:32]; 
 // assign	ZNaNE = FmtE ? &Z[62:52] & |Z[51:0] : &Z[62:55] & |Z[54:32]; 
 assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22:0]; 
 // instantiate device under test
    logic [3*`NF+5:0]	SumE, SumM;       
@ -141,16 +146,16 @@ assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22
    logic 			    NegSumE, NegSumM;
    logic 			    ZSgnEffE, ZSgnEffM;
    logic 			    PSgnE, PSgnM;
-    logic [8:0]			NormCntE, NormCntM;
+    logic [$clog2(3*`NF+7)-1:0]			NormCntE, NormCntM;
-    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}),
+    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
                 .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                .ProdExpE, .AddendStickyE, .KillProdE); 
-fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
+fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
              //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
                .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
-               .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
+               .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM, .Mult);
 // produce clock
@ -168,61 +173,156 @@ fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZEx
 always @(posedge clk)
 begin
  #1; 
-  if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
+  if (`FPSIZES === 3 | `FPSIZES === 4) begin
-  else	begin	  X = {{32{1'b1}}, testvectors[vectornum][135:104]};
+    if (FmtE==2'b11) {X, Y, Z, ans, flags} = testvectors[vectornum];
-  		  Y = {{32{1'b1}}, testvectors[vectornum][103:72]};
+    else if (FmtE==2'b01)	begin	  
-  		  Z = {{32{1'b1}}, testvectors[vectornum][71:40]};
+      X = {{`FLEN-64{1'b1}}, testvectors[vectornum][263:200]};
-  		  ans = {{32{1'b1}}, testvectors[vectornum][39:8]};
+      Y = {{`FLEN-64{1'b1}}, testvectors[vectornum][199:136]};
-  		  flags = testvectors[vectornum][7:0];
+      Z = {{`FLEN-64{1'b1}}, testvectors[vectornum][135:72]};
      ans = {{`FLEN-64{1'b1}}, testvectors[vectornum][71:8]};
      flags = testvectors[vectornum][7:0];
    end
    else if (FmtE==2'b00)	begin	  
      X = {{`FLEN-32{1'b1}}, testvectors[vectornum][135:104]};
      Y = {{`FLEN-32{1'b1}}, testvectors[vectornum][103:72]};
      Z = {{`FLEN-32{1'b1}}, testvectors[vectornum][71:40]};
      ans = {{`FLEN-32{1'b1}}, testvectors[vectornum][39:8]};
      flags = testvectors[vectornum][7:0];
    end
    else	begin	  
      X = {{`FLEN-16{1'b1}}, testvectors[vectornum][71:56]};
      Y = {{`FLEN-16{1'b1}}, testvectors[vectornum][55:40]};
      Z = {{`FLEN-16{1'b1}}, testvectors[vectornum][39:24]};
      ans = {{`FLEN-16{1'b1}}, testvectors[vectornum][23:8]};
      flags = testvectors[vectornum][7:0];
    end
  end
  else begin
    if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
    else if (FmtE==1'b0)	begin	  
      X = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+4*(`LEN1)-1:8+3*(`LEN1)]};
      Y = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+3*(`LEN1)-1:8+2*(`LEN1)]};
      Z = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+2*(`LEN1)-1:8+(`LEN1)]};
      ans = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+(`LEN1-1):8]};
      flags = testvectors[vectornum][7:0];
    end
  end
 end
 // check results on falling edge of clk
  always @(negedge clk) begin
      if (`FPSIZES === 1 | `FPSIZES === 2) begin
        if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]}))  || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin
        //  fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
        // if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin
              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(XDenormE) $display( "xdenorm ");
          if(YDenormE) $display( "ydenorm ");
          if(ZDenormE) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
          if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
          if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
          if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
              errors = errors + 1;
          //if (errors === 10)
          $stop;
          end
          if((FmtE==1'b0)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[`LEN1-2:0] === {X[`LEN1-2:`NF1],1'b1,X[`NF1-2:0]})) || (YNaNE && (FMAResM[`LEN1-2:0] === {Y[`LEN1-2:`NF1],1'b1,Y[`NF1-2:0]}))  || (ZNaNE && (FMAResM[`LEN1-2:0] === {Z[`LEN1-2:`NF1],1'b1,Z[`NF1-2:0]})) || (FMAResM[`LEN1-2:0] === ans[`LEN1-2:0]))) ))) begin
              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
              errors = errors + 1;
        // if (errors === 9)
          $stop;
          end
 end else begin
        if((FmtE==2'b11) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]}))  || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin
        //  fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
        // if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin
              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(XDenormE) $display( "xdenorm ");
          if(YDenormE) $display( "ydenorm ");
          if(ZDenormE) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
          if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
          if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
          if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
              errors = errors + 1;
          //if (errors === 10)
          $stop;
          end
          if((FmtE==1'b01)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[64-2:0] === {X[64-2:52],1'b1,X[52-2:0]})) || (YNaNE && (FMAResM[64-2:0] === {Y[64-2:52],1'b1,Y[52-2:0]}))  || (ZNaNE && (FMAResM[64-2:0] === {Z[64-2:52],1'b1,Z[52-2:0]})) || (FMAResM[62:0] === ans[62:0]))) ))) begin
              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
              errors = errors + 1;
        // if (errors === 9)
          $stop;
          end
          if((FmtE==2'b00)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[32-2:0] === {X[32-2:23],1'b1,X[23-2:0]})) || (YNaNE && (FMAResM[32-2:0] === {Y[32-2:23],1'b1,Y[23-2:0]}))  || (ZNaNE && (FMAResM[32-2:0] === {Z[32-2:23],1'b1,Z[23-2:0]})) || (FMAResM[30:0] === ans[30:0]))) ))) begin
              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
              errors = errors + 1;
        // if (errors === 9)
          $stop;
          end
          if((FmtE==2'b10)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[16-2:0] === {X[16-2:10],1'b1,X[10-2:0]})) || (YNaNE && (FMAResM[16-2:0] === {Y[16-2:10],1'b1,Y[10-2:0]}))  || (ZNaNE && (FMAResM[16-2:0] === {Z[16-2:10],1'b1,Z[10-2:0]})) || (FMAResM[14:0] === ans[14:0]))) ))) begin
              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
              errors = errors + 1;
        // if (errors === 9)
          $stop;
          end
 end
 	if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~((XNaNE & (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) | (YNaNE & (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]}))  | (ZNaNE & (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) | (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin
  //  fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
 	// if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (FMAResM != ans))) begin
        $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
 		if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
 		if(XDenormE) $display( "xdenorm ");
 		if(YDenormE) $display( "ydenorm ");
 		if(ZDenormE) $display( "zdenorm ");
 		if(FMAFlgM[4] != 0) $display( "invld ");
 		if(FMAFlgM[2] != 0) $display( "ovrflw ");
 		if(FMAFlgM[1] != 0) $display( "unflw ");
 		if(FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=-inf ");
 		if(~FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=+inf ");
 		if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
 		if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
 		if(ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=-inf ");
 		if(~ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=+inf ");
 		if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ~ans[`NF-1]) $display( "ans=sigNaN ");
 		if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ans[`NF-1]) $display( "ans=qutNaN ");
        errors = errors + 1;
 	  //if (errors == 10)
 		$stop;
    end
    if((FmtE==1'b0)&(FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~(((XNaNE & (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) | (YNaNE & (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]}))  | (ZNaNE & (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) | (FMAResM[30:0] == ans[30:0]))) ))) begin
        $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
 		if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
 		if(~(|X[30:23]) & |X[22:0]) $display( "xdenorm ");
 		if(~(|Y[30:23]) & |Y[22:0]) $display( "ydenorm ");
 		if(~(|Z[30:23]) & |Z[22:0]) $display( "zdenorm ");
 		if(FMAFlgM[4] != 0) $display( "invld ");
 		if(FMAFlgM[2] != 0) $display( "ovrflw ");
 		if(FMAFlgM[1] != 0) $display( "unflw ");
 		if(FMAResM == 64'hFF80000000000000) $display( "FMAResM=-inf ");
 		if(FMAResM == 64'h7F80000000000000) $display( "FMAResM=+inf ");
 		if(&FMAResM[30:23] & |FMAResM[22:0] & ~FMAResM[22]) $display( "FMAResM=sigNaN ");
 		if(&FMAResM[30:23] & |FMAResM[22:0] & FMAResM[22] ) $display( "FMAResM=qutNaN ");
 		if(ans == 64'hFF80000000000000) $display( "ans=-inf ");
 		if(ans == 64'h7F80000000000000) $display( "ans=+inf ");
 		if(&ans[30:23] & |ans[22:0] & ~ans[22] ) $display( "ans=sigNaN ");
 		if(&ans[30:23] & |ans[22:0] & ans[22]) $display( "ans=qutNaN ");
        errors = errors + 1;
 	  if (errors == 10)
 		$stop;
    end
 vectornum = vectornum + 1;
 if (testvectors[vectornum] === 194'bx) begin
 $display("%d tests completed with %d errors", vectornum, errors);
--- a/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
+++ b/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
@ -1,3 +1,3 @@
-testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
+testfloat_gen f128_mulAdd -tininessafter -n 6133248 -rmin  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace
--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@ -42,6 +42,7 @@ module fcmp (
   //             - if negitive - no
   //             - if positive - yes
   // note: LT does -0 < 0
   //*** compare Exp and Man together
   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
   assign EQ = (FSrcXE == FSrcYE);
--- a/pipelined/src/fpu/fcvtfp.sv
+++ b/pipelined/src/fpu/fcvtfp.sv
@ -103,7 +103,7 @@ module cvtfp (
    assign LSBFrac = DSFrac[3];
-    always_comb begin
+    always_comb begin // ***remove guard bit
        // Determine if you add 1
        case (FrmE)
            3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
@ -166,6 +166,7 @@ module cvtfp (
                {XSgnE, DSResExp, DSResFrac};
        // select the final result based on the opperation
        //*** in al units before putting into : ? put in a seperate signal
        assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]};
    end else begin
        // select the double to single precision result
--- a/pipelined/src/fpu/fcvtint.sv
+++ b/pipelined/src/fpu/fcvtint.sv
@ -10,7 +10,6 @@ module fcvt (
    input logic             XNaNE,      // is X NaN 
    input logic             XInfE,      // is X infinity
    input logic             XDenormE,   // is X denormalized
    input logic [10:0]      BiasE,      // bias - depends on precision (max exponent/2)
    input logic [`XLEN-1:0] ForwardedSrcAE,      // integer input
    input logic [2:0]       FOpCtrlE,   // chooses which instruction is done (full list below)
    input logic [2:0]       FrmE,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
@ -70,7 +69,7 @@ module fcvt (
    assign Bits = Res64 ? 8'd64 : 8'd32;
    // calulate the unbiased exponent
-    assign ExpVal = {1'b0,XExpE} - {1'b0,BiasE} + {12'b0, XDenormE};
+    assign ExpVal = {1'b0,XExpE} - {1'b0, (11)'(`BIAS)} + {12'b0, XDenormE};
 ////////////////////////////////////////////////////////
@ -121,7 +120,7 @@ module fcvt (
    assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
    assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
-    always_comb begin
+    always_comb begin//*** remove guard bit
        // Determine if you add 1
        case (FrmE)
            3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -29,17 +29,12 @@
 `include "wally-config.vh"
 //  `define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
 //  `define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
 //  `define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
 //  `define XLEN 64
 //  `define IEEE754 1
 module fma(
    input logic                 clk,
    input logic                 reset,
    input logic                 FlushM,     // flush the memory stage
    input logic                 StallM,     // stall memory stage
-    input logic                 FmtE, FmtM, // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0] FmtE, FmtM, // precision 1 = double 0 = single
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic                 XSgnE, YSgnE, ZSgnE,    // input signs - execute stage
@ -75,7 +70,7 @@ module fma(
    logic 			    NegSumE, NegSumM;
    logic 			    ZSgnEffE, ZSgnEffM;
    logic 			    PSgnE, PSgnM;
-    logic [8:0]			NormCntE, NormCntM;
+    logic [$clog2(3*`NF+7)-1:0]			NormCntE, NormCntM;
    logic               Mult;
    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
@ -86,7 +81,7 @@ module fma(
    // E/M pipeline registers
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-    flopenrc #(16) EMRegFma4(clk, reset, FlushM, ~StallM, 
+    flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, 
                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0]},
                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult});
@ -98,6 +93,7 @@ module fma(
 endmodule
        //*** in al units before putting into : ? put in a seperate signal
 module fma1(
    input logic                 XSgnE, YSgnE, ZSgnE,    // input's signs
@ -106,7 +102,7 @@ module fma1(
    input logic                 XDenormE, YDenormE, ZDenormE, // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
-    input logic                 FmtE,       // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0] FmtE,       // precision 1 = double 0 = single
    output logic [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
@ -115,7 +111,7 @@ module fma1(
    output logic                InvZE,          // intert Z
    output logic                ZSgnEffE,       // the modified Z sign
    output logic                PSgnE,          // the product's sign
-    output logic [8:0]          NormCntE        // normalization shift cnt
+    output logic [$clog2(3*`NF+7)-1:0]          NormCntE        // normalization shift cnt
    );
    logic [`NE-1:0]     Denorm;             // value of a denormaized number based on precision
@ -157,37 +153,63 @@ module fma1(
    add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
-    loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NormCntE);
+    loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
    // Choose the positive sum and accompanying LZA result.
    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
    // assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
 endmodule
 module expadd(    
-    input  logic            FmtE,          // precision
+    input  logic [`FPSIZES/3:0] FmtE,          // precision
-    input  logic [`NE-1:0]  XExpE, YExpE,  // input exponents
+    input  logic [`NE-1:0]      XExpE, YExpE,  // input exponents
-    input  logic            XDenormE, YDenormE,    // are the inputs denormalized
+    input  logic                XDenormE, YDenormE,    // are the inputs denormalized
-    input  logic            XZeroE, YZeroE,        // are the inputs zero
+    input  logic                XZeroE, YZeroE,        // are the inputs zero
-    output logic [`NE-1:0]  XExpVal, YExpVal,      // Exponent value after taking into account denormals
+    output logic [`NE-1:0]      XExpVal, YExpVal,      // Exponent value after taking into account denormals
-    output logic [`NE-1:0]  Denorm,        // value of denormalized exponent
+    output logic [`NE-1:0]      Denorm,        // value of denormalized exponent
-    output logic [`NE+1:0]  ProdExpE       // product's exponent B^(1023)NE+2
+    output logic [`NE+1:0]      ProdExpE       // product's exponent B^(1023)NE+2
 );
    // denormalized numbers have diffrent values depending on which precison it is.
-    //      double - 1
+    //      FLEN - 1
-    //      single - 1023-127+1 = 897
+    //      Other - BIAS - other bias + 1
-    assign Denorm = FmtE ? 1 : 897;
+    
    if (`FPSIZES == 1) begin
        assign Denorm = 1;
    end else if (`FPSIZES == 2) begin
        assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1);
    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (FmtE)
                `FMT: assign Denorm = 1;
                `FMT1: assign Denorm = `BIAS-`BIAS1+1;
                `FMT2: assign Denorm = `BIAS-`BIAS2+1;
                default: assign Denorm = 1'bx;
            endcase
        end
    end else begin
        always_comb begin
            case (FmtE)
                2'h3: assign Denorm = 1;
                2'h1: assign Denorm = `BIAS-`D_BIAS+1;
                2'h0: assign Denorm = `BIAS-`S_BIAS+1;
                2'h2: assign Denorm = `BIAS-`H_BIAS+1;
            endcase
        end
    end
    // pick denormalized value or exponent
    assign XExpVal = XDenormE ? Denorm : XExpE;
    assign YExpVal = YDenormE ? Denorm : YExpE;
    // kill the exponent if the product is zero - either X or Y is 0
-    assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, `NE'h3ff})&{`NE+2{~(XZeroE|YZeroE)}};
+    assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
 endmodule
@ -261,7 +283,7 @@ module align(
    //      - Denormal numbers have a diffrent exponent value depending on the precision
    assign ZExpVal = ZDenormE ? Denorm : ZExpE;
    // assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3);
-    assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - 1020+`NF - {2'b0, ZExpVal};
+    assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal};
    // Defualt Addition without shifting
    //          |   54'b0    |  106'b(product)  | 2'b0 |
@ -276,7 +298,7 @@ module align(
        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //  | addnend |
-        if ($signed(AlignCnt) < $signed(13'b0)) begin
+        if ($signed(AlignCnt) < $signed((`NE+2)'(0))) begin
            KillProdE = 1;
            ZManShifted = ZManPreShifted;
            AddendStickyE = ~(XZeroE|YZeroE);
@ -284,7 +306,7 @@ module align(
        // If the Addend is shifted right
        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //                                  | addnend |
-        end else if ($signed(AlignCnt)<=$signed(13'd3*13'd`NF+13'd4))  begin
+        end else if ($signed(AlignCnt)<=$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)))  begin
            KillProdE = 0;
            ZManShifted = ZManPreShifted >> AlignCnt;
            AddendStickyE = |(ZManShifted[`NF-1:0]);
@ -356,7 +378,7 @@ endmodule
 module loa( //https://ieeexplore.ieee.org/abstract/document/930098
    input logic  [3*`NF+6:0] A,     // addend
    input logic  [2*`NF+1:0] P,     // product
-    output logic [8:0]       NormCntE   // normalization shift count for the positive result
+    output logic [$clog2(3*`NF+7)-1:0]       NormCntE   // normalization shift count for the positive result
    ); 
    logic [3*`NF+6:0] T;
@ -389,14 +411,14 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
 endmodule
 module lzc(
-    input logic  [3*`NF+6:0]    f,
+    input logic  [3*`NF+6:0]            f,
-    output logic [8:0]          NormCntE    // normalization shift
+    output logic [$clog2(3*`NF+7)-1:0]    NormCntE    // normalization shift
 );
-    logic [8:0] i;
+    logic [$clog2(3*`NF+7)-1:0] i;
    always_comb begin
        i = 0;
-        while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1;  // search for leading one
+        while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1;  // search for leading one
        NormCntE = i;
    end
 endmodule
@ -410,27 +432,27 @@ endmodule
 module fma2(
-    input logic                 XSgnM, YSgnM,        // input signs
+    input logic                             XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NE-1:0]               XExpM, YExpM, ZExpM, // input exponents
-    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
+    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 FmtM,       // precision 1 = double 0 = single
+    input logic     [`FPSIZES/3:0]          FmtM,       // precision 1 = double 0 = single
-    input logic     [`NE+1:0]   ProdExpM,       // X exponent + Y exponent - bias
+    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
-    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                 XZeroM, YZeroM, ZZeroM, // inputs are zero
+    input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
+    input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic     [3*`NF+5:0] SumM,       // the positive sum
+    input logic     [3*`NF+5:0]             SumM,       // the positive sum
-    input logic                 NegSumM,    // was the sum negitive
+    input logic                             NegSumM,    // was the sum negitive
-    input logic                 InvZM,      // do you invert Z
+    input logic                             InvZM,      // do you invert Z
-    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                 PSgnM,      // the product's sign
+    input logic                             PSgnM,      // the product's sign
-    input logic                 Mult,       // multiply opperation
+    input logic                             Mult,       // multiply opperation
-    input logic     [8:0]       NormCntM,   // the normalization shift count
+    input logic     [$clog2(3*`NF+7)-1:0]   NormCntM,   // the normalization shift count
-    output logic    [`FLEN-1:0] FMAResM,    // FMA final result
+    output logic    [`FLEN-1:0]             FMAResM,    // FMA final result
-    output logic    [4:0]       FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
+    output logic    [4:0]                   FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -548,28 +570,27 @@ endmodule
 module normalize(
-    input logic  [3*`NF+5:0]    SumM,       // the positive sum
+    input logic  [3*`NF+5:0]            SumM,       // the positive sum
-    input logic  [`NE-1:0]      ZExpM,      // exponent of Z
+    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
-    input logic  [`NE+1:0]      ProdExpM,   // X exponent + Y exponent - bias
+    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
-    input logic  [8:0]          NormCntM,   // normalization shift count
+    input logic  [$clog2(3*`NF+7)-1:0]  NormCntM,   // normalization shift count
-    input logic                 FmtM,       // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0]         FmtM,       // precision 1 = double 0 = single
-    input logic                 KillProdM,  // is the product set to zero
+    input logic                         KillProdM,  // is the product set to zero
-    input logic                 AddendStickyM,  // the sticky bit caclulated from the aligned addend
+    input logic                         AddendStickyM,  // the sticky bit caclulated from the aligned addend
-    input logic                 NegSumM,    // was the sum negitive
+    input logic                         NegSumM,    // was the sum negitive
-    output logic [`NF+2:0]      NormSum,        // normalized sum
+    output logic [`NF+2:0]              NormSum,        // normalized sum
-    output logic                SumZero,        // is the sum zero
+    output logic                        SumZero,        // is the sum zero
-    output logic                NormSumSticky, UfSticky,    // sticky bits
+    output logic                        NormSumSticky, UfSticky,    // sticky bits
-    output logic [`NE+1:0]      SumExp,         // exponent of the normalized sum
+    output logic [`NE+1:0]              SumExp,         // exponent of the normalized sum
-    output logic                ResultDenorm    // is the result denormalized
+    output logic                        ResultDenorm    // is the result denormalized
 );
-    logic [`NE+1:0]     SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic [`NE+1:0]             SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
-    logic [8:0]         DenormShift;        // right shift if the result is denormalized //***change this later
+    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
-    logic [3*`NF+5:0]   CorrSumShifted;     // the shifted sum after LZA correction
+    logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
-    logic [3*`NF+8:0]   SumShifted;         // the shifted sum before LZA correction
+    logic [3*`NF+8:0]           SumShifted;         // the shifted sum before LZA correction
-    logic [`NE+1:0]     SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
+    logic [`NE+1:0]             SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
-    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
+    logic                       PreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic               PreResultDenorm2;   // is the result denormalized - calculated before LZA corection
+    logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
    logic               LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
@ -580,14 +601,89 @@ module normalize(
    // calculate the sum's exponent
    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
-    logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL;
+    //convert the sum's exponent into the propper percision
-    assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
+    if (`FPSIZES == 1) begin
-    assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd2)));
+        assign SumExpTmp = SumExpTmpTmp;
-    assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127);
+
-    assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd25+13'd1023-13'd127)) | ~|SumExpTmpTmp;
+    end else if (`FPSIZES == 2) begin
-    assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero;
+        assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (FmtM)
                `FMT: assign SumExpTmp = SumExpTmpTmp;
                `FMT1: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS1)&{`NE+2{|SumExpTmpTmp}};
                `FMT2: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS2)&{`NE+2{|SumExpTmpTmp}};
                default: assign SumExpTmp = `NE+2'bx;
            endcase
        end
    end else begin
        always_comb begin
            case (FmtM)
                2'h3: assign SumExpTmp = SumExpTmpTmp;
                2'h1: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`D_BIAS)&{`NE+2{|SumExpTmpTmp}};
                2'h0: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`S_BIAS)&{`NE+2{|SumExpTmpTmp}};
                2'h2: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`H_BIAS)&{`NE+2{|SumExpTmpTmp}};
            endcase
        end
    end
    // determine if the result is denormalized
    if (`FPSIZES == 1) begin
        logic Sum0LEZ, Sum0GEFL;
        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
        assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
    end else if (`FPSIZES == 2) begin
        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
        assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
    end else if (`FPSIZES == 3) begin
        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
        always_comb begin
            case (FmtM)
                `FMT: assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
                `FMT1: assign PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
                `FMT2: assign PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
                default: assign PreResultDenorm = 1'bx;
            endcase
        end
    end else begin
        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF  )-(`NE+2)'(2));
        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
        assign Sum3LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
        assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
        always_comb begin
            case (FmtM)
                2'h3: assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
                2'h1: assign PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
                2'h0: assign PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
                2'h2: assign PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
            endcase
        end
    end
    // 010. when should be 001.
    //      - shift left one
@ -599,45 +695,66 @@ module normalize(
    // Determine the shift needed for denormal results
    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1;
+    assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
    // Normalize the sum
    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
    // LZA correction
    assign LZAPlus1 = SumShifted[3*`NF+7];
    assign LZAPlus2 = SumShifted[3*`NF+8];
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
-    assign CorrSumShifted =  LZAPlus1&~KillProdM ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
+    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
    // Calculate the sticky bit
-    assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM);
+    if (`FPSIZES == 1) begin
        assign NormSumSticky = |CorrSumShifted[2*`NF+2:0];
    end else if (`FPSIZES == 2) begin
        // 3*NF+5 - NF1 - 3
        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
        (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM);
    end else if (`FPSIZES == 3) begin
        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
        (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) | 
        (|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2));
    end else begin        
        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
        (|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | 
        (|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) |
        (|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2));
    end
    assign UfSticky = AddendStickyM | NormSumSticky;
    // Determine sum's exponent
    //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm2&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
+    assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
    // recalculate if the result is denormalized
-    assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
+    assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
 endmodule
 module fmaround(
-    input logic             FmtM,       // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0] FmtM,       // precision 1 = double 0 = single
-    input logic  [2:0]      FrmM,       // rounding mode
+    input logic  [2:0]          FrmM,       // rounding mode
-    input logic             UfSticky,   // sticky bit for underlow calculation
+    input logic                 UfSticky,   // sticky bit for underlow calculation
-    input logic  [`NF+2:0]  NormSum,    // normalized sum
+    input logic  [`NF+2:0]      NormSum,    // normalized sum
-    input logic             AddendStickyM,  // addend's sticky bit
+    input logic                 AddendStickyM,  // addend's sticky bit
-    input logic             NormSumSticky,  // normalized sum's sticky bit
+    input logic                 NormSumSticky,  // normalized sum's sticky bit
-    input logic             ZZeroM,         // is Z zero
+    input logic                 ZZeroM,         // is Z zero
-    input logic             InvZM,          // invert Z
+    input logic                 InvZM,          // invert Z
-    input logic  [`NE+1:0]  SumExp,         // exponent of the normalized sum
+    input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
-    input logic             ResultSgnTmp,      // the result's sign
+    input logic                 ResultSgnTmp,      // the result's sign
-    output logic            CalcPlus1, UfPlus1,  // do you add or subtract on from the result
+    output logic                CalcPlus1, UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]  FullResultExp,      // ResultExp with bits to determine sign and overflow
+    output logic [`NE+1:0]      FullResultExp,      // ResultExp with bits to determine sign and overflow
-    output logic [`NF-1:0]  ResultFrac,         // Result fraction
+    output logic [`NF-1:0]      ResultFrac,         // Result fraction
-    output logic [`NE-1:0]  ResultExp,          // Result exponent
+    output logic [`NE-1:0]      ResultExp,          // Result exponent
-    output logic            Sticky,             // sticky bit
+    output logic                Sticky,             // sticky bit
-    output logic [`FLEN:0]  RoundAdd,           // how much to add to the result
+    output logic [`FLEN:0]      RoundAdd,           // how much to add to the result
-    output logic            Round, Guard, UfLSBNormSum // bits needed to calculate rounding
+    output logic                Round, Guard, UfLSBNormSum // bits needed to calculate rounding
 );
    logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
@ -676,18 +793,146 @@ module fmaround(
    //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
    //      110/111 - Plus1
-    // determine guard, round, and least significant bit of the result
+    if (`FPSIZES == 1) begin
-    assign Guard = FmtM ? NormSum[2] : NormSum[31];
+        // determine guard, round, and least significant bit of the result
-    assign Round = FmtM ? NormSum[1] : NormSum[30];
+        assign Guard = NormSum[2];
-    assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
+        assign Round = NormSum[1];
        assign LSBNormSum = NormSum[3];
        // used to determine underflow flag
        assign UfGuard = NormSum[1];
        assign UfRound = NormSum[0];
        assign UfLSBNormSum = NormSum[2];
        // determine sticky
        assign Sticky = UfSticky | NormSum[0];
    end else if (`FPSIZES == 2) begin
        //         \/-------------NF---------------,
        //      |      NF1       | 3 |             |
        //          '-------NF1------^
        // determine guard, round, and least significant bit of the result
        assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
        assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
        assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3];
        // used to determine underflow flag
        assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
        assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
        assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
        // determine sticky
        assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]);
    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (FmtM)
                `FMT: begin
                    // determine guard, round, and least significant bit of the result
                    assign Guard = NormSum[2];
                    assign Round = NormSum[1];
                    assign LSBNormSum = NormSum[3];
                    // used to determine underflow flag
                    assign UfGuard = NormSum[1];
                    assign UfRound = NormSum[0];
                    assign UfLSBNormSum = NormSum[2];
                    // determine sticky
                    assign Sticky = UfSticky | NormSum[0];
                end
                `FMT1: begin
                    // determine guard, round, and least significant bit of the result
                    assign Guard = NormSum[`NF-`NF1+2];
                    assign Round = NormSum[`NF-`NF1+1];
                    assign LSBNormSum = NormSum[`NF-`NF1+3];
                    // used to determine underflow flag
                    assign UfGuard = NormSum[`NF-`NF1+1];
                    assign UfRound = NormSum[`NF-`NF1];
                    assign UfLSBNormSum = NormSum[`NF-`NF1+2];
                    // determine sticky
                    assign Sticky = UfSticky | NormSum[`NF-`NF1];
                end
                `FMT2: begin
                    // determine guard, round, and least significant bit of the result
                    assign Guard = NormSum[`NF-`NF2+2];
                    assign Round = NormSum[`NF-`NF2+1];
                    assign LSBNormSum = NormSum[`NF-`NF2+3];
                    // used to determine underflow flag
                    assign UfGuard = NormSum[`NF-`NF2+1];
                    assign UfRound = NormSum[`NF-`NF2];
                    assign UfLSBNormSum = NormSum[`NF-`NF2+2];
                    // determine sticky
                    assign Sticky = UfSticky | NormSum[`NF-`NF2];
                end
                default: begin
                    assign Guard = 1'bx;
                    assign Round = 1'bx;
                    assign LSBNormSum = 1'bx;
                    assign UfGuard = 1'bx;
                    assign UfRound = 1'bx;
                    assign UfLSBNormSum = 1'bx;
                    assign Sticky = 1'bx;
                end
            endcase
        end
    end else begin
        always_comb begin
            case (FmtM)
                2'h3: begin
                    // determine guard, round, and least significant bit of the result
                    assign Guard = NormSum[2];
                    assign Round = NormSum[1];
                    assign LSBNormSum = NormSum[3];
                    // used to determine underflow flag
                    assign UfGuard = NormSum[1];
                    assign UfRound = NormSum[0];
                    assign UfLSBNormSum = NormSum[2];
                    // determine sticky
                    assign Sticky = UfSticky | NormSum[0];
                end
                2'h1: begin
                    // determine guard, round, and least significant bit of the result
                    assign Guard = NormSum[`NF-`D_NF+2];
                    assign Round = NormSum[`NF-`D_NF+1];
                    assign LSBNormSum = NormSum[`NF-`D_NF+3];
                    // used to determine underflow flag
                    assign UfGuard = NormSum[`NF-`D_NF+1];
                    assign UfRound = NormSum[`NF-`D_NF];
                    assign UfLSBNormSum = NormSum[`NF-`D_NF+2];
                    // determine sticky
                    assign Sticky = UfSticky | NormSum[`NF-`D_NF];
                end
                2'h0: begin
                    // determine guard, round, and least significant bit of the result
                    assign Guard = NormSum[`NF-`S_NF+2];
                    assign Round = NormSum[`NF-`S_NF+1];
                    assign LSBNormSum = NormSum[`NF-`S_NF+3];
                    // used to determine underflow flag
                    assign UfGuard = NormSum[`NF-`S_NF+1];
                    assign UfRound = NormSum[`NF-`S_NF];
                    assign UfLSBNormSum = NormSum[`NF-`S_NF+2];
                    // determine sticky
                    assign Sticky = UfSticky | NormSum[`NF-`S_NF];
                end
                2'h2: begin
                    // determine guard, round, and least significant bit of the result
                    assign Guard = NormSum[`NF-`H_NF+2];
                    assign Round = NormSum[`NF-`H_NF+1];
                    assign LSBNormSum = NormSum[`NF-`H_NF+3];
                    // used to determine underflow flag
                    assign UfGuard = NormSum[`NF-`H_NF+1];
                    assign UfRound = NormSum[`NF-`H_NF];
                    assign UfLSBNormSum = NormSum[`NF-`H_NF+2];
                    // determine sticky
                    assign Sticky = UfSticky | NormSum[`NF-`H_NF];
                end
            endcase
        end
    end
    // used to determine underflow flag
    assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
    assign UfRound = FmtM ? NormSum[0] : NormSum[29];
    assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];
    // determine sticky
    assign Sticky = UfSticky | NormSum[0];
    // Deterimine if a small number was supposed to be subtrated
    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
@ -729,10 +974,40 @@ module fmaround(
    assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
    // Compute rounded result
-    assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
+    if (`FPSIZES == 1) begin
-                             Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
+        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
    assign NormSumTruncated = {NormSum[`NF+2:32], NormSum[31:3]&{29{FmtM}}};
    end else if (`FPSIZES == 2) begin
        // \/FLEN+1
        //  | NE+2 |        NF      |
        //  '-NE+2-^----NF1----^
        // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
        assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
                                Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (FmtM)
                `FMT: assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
                `FMT1: assign RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
                `FMT2: assign RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
                default: assign RoundAdd = (`FLEN+1)'(0);
            endcase
        end
    end else begin        
        always_comb begin
            case (FmtM)
                2'h3: assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
                2'h1: assign RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
                2'h0: assign RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
                2'h2: assign RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
            endcase
        end
    end
    assign NormSumTruncated = NormSum[`NF+2:3];
    assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
    assign ResultExp = FullResultExp[`NE-1:0];
@ -748,7 +1023,7 @@ module fmaflags(
    input logic  [`NE+1:0]      SumExp,                 // exponent of the normalized sum
    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
    input logic                 Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
-    input logic                 FmtM,                   // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0] FmtM,                   // precision 1 = double 0 = single
    output logic                Invalid, Overflow, Underflow, // flags used to select the result
    output logic [4:0]          FMAFlgM // FMA flags
 );
@ -772,7 +1047,33 @@ module fmaflags(
    // Set Overflow flag if the number is too big to be represented
    //      - Don't set the overflow flag if an overflowed result isn't outputed    
-    assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[7:0] | FullResultExp[8];
+    if (`FPSIZES == 1) begin
        assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
    end else if (`FPSIZES == 2) begin
        assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (FmtM)
                `FMT: assign GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
                `FMT1: assign GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
                `FMT2: assign GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
                default: assign GtMaxExp = 1'bx;
            endcase
        end
    end else begin        
        always_comb begin
            case (FmtM)
                2'h3: assign GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
                2'h1: assign GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
                2'h0: assign GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
                2'h2: assign GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
            endcase
        end
    end
    assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
    // Set Underflow flag if the number is too small to be represented in normal numbers
@ -793,57 +1094,227 @@ endmodule
 module resultselect(
-    input logic                 XSgnM, YSgnM,        // input signs
+    input logic                     XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NE-1:0]       XExpM, YExpM, ZExpM, // input exponents
-    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
+    input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 FmtM,       // precision 1 = double 0 = single
+    input logic     [`FPSIZES/3:0]  FmtM,       // precision 1 = double 0 = single
-    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                 PSgnM,      // the product's sign
+    input logic                     PSgnM,      // the product's sign
-    input logic                 ResultSgn,  // the result's sign
+    input logic                     ResultSgn,  // the result's sign
-    input logic                 CalcPlus1,  // rounding bits
+    input logic                     CalcPlus1,  // rounding bits
-    input logic     [`FLEN:0]   RoundAdd,   // how much to add to the result
+    input logic     [`FLEN:0]       RoundAdd,   // how much to add to the result
-    input logic                 Invalid, Overflow, Underflow,  // flags
+    input logic                     Invalid, Overflow, Underflow,  // flags
-    input logic                 ResultDenorm,       // is the result denormalized
+    input logic                     ResultDenorm,       // is the result denormalized
-    input logic     [`NE-1:0]   ResultExp,          // Result exponent
+    input logic     [`NE-1:0]       ResultExp,          // Result exponent
-    input logic     [`NF-1:0]   ResultFrac,         // Result fraction
+    input logic     [`NF-1:0]       ResultFrac,         // Result fraction
-    output logic    [`FLEN-1:0] FMAResM     // FMA final result
+    output logic    [`FLEN-1:0]     FMAResM     // FMA final result
 );
-    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
+    logic               InfSgn;
    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
    assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
    if (`FPSIZES == 1) begin
        if(`IEEE754) begin
            assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
            assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
            assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
            assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
        end else begin
            assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
        end
        assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                    {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
        assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
        assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
        assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
        assign NormResult = {ResultSgn, ResultExp, ResultFrac};
    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
        if(`IEEE754) begin
            assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
            assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
            assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
            assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
        end else begin 
            assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
        end
        assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                            {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                        ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                            {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
        assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
        assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
        assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (FmtM)
                `FMT: begin  
                    if(`IEEE754) begin
                        assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
                        assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
                        assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
                        assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end else begin 
                        assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end
                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
                    assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    assign NormResult = {ResultSgn, ResultExp, ResultFrac};
                end
                `FMT1: begin  
                    if(`IEEE754) begin
                        assign XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
                        assign YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
                        assign ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
                        assign InvalidResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                    end else begin 
                        assign XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                    end
                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
                    assign KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                    assign UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    assign InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
                    assign NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
                end
                `FMT2: begin  
                    if(`IEEE754) begin
                        assign XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
                        assign YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
                        assign ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
                        assign InvalidResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
                    end else begin 
                        assign XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
                    end
                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
                    assign KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                    assign UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    assign InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
                    assign NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
                end
                default: begin
                    if(`IEEE754) begin
                        assign XNaNResult = (`FLEN)'(0);
                        assign YNaNResult = (`FLEN)'(0);
                        assign ZNaNResult = (`FLEN)'(0);
                        assign InvalidResult = (`FLEN)'(0);
                    end else begin 
                        assign XNaNResult = (`FLEN)'(0);
                    end
                    assign OverflowResult = (`FLEN)'(0);
                    assign KillProdResult = (`FLEN)'(0);
                    assign UnderflowResult = (`FLEN)'(0);
                    assign InfResult = (`FLEN)'(0);
                    assign NormResult = (`FLEN)'(0);
                end
            endcase
        end
    end else begin 
        always_comb begin
            case (FmtM)
                2'h3: begin  
                    if(`IEEE754) begin
                        assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
                        assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
                        assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
                        assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end else begin 
                        assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end
                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
                    assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    assign NormResult = {ResultSgn, ResultExp, ResultFrac};
                end
                2'h1: begin  
                    if(`IEEE754) begin
                        assign XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
                        assign YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
                        assign ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
                        assign InvalidResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                    end else begin 
                        assign XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                    end
                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
                    assign KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:0], ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                    assign UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    assign InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
                    assign NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
                end
                2'h0: begin  
                    if(`IEEE754) begin
                        assign XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
                        assign YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
                        assign ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
                        assign InvalidResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
                    end else begin 
                        assign XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
                    end
                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
                    assign KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                    assign UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    assign InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
                    assign NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
                end
                2'h2: begin  
                    if(`IEEE754) begin
                        assign XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
                        assign YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
                        assign ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
                        assign InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
                    end else begin 
                        assign XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
                    end
                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
                                                                                                              {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
                    assign KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:0], ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                    assign UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    assign InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
                    assign NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
                end
            endcase
        end
    end
    if(`IEEE754) begin
-        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
+        assign FMAResM = XNaNM ? XNaNResult :
-        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
+                            YNaNM ? YNaNResult :
-        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
+                            ZNaNM ? ZNaNResult :
-        assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
+                            Invalid ? InvalidResult :
-      end else begin
+                            XInfM|YInfM|ZInfM ? InfResult :
-        assign XNaNResult = FmtM ? {1'b0, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpM[7:0], 1'b1, 22'b0};
+                            KillProdM ? KillProdResult :  
-        assign YNaNResult = FmtM ? {1'b0, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpM[7:0], 1'b1, 22'b0};
+                            Overflow ? OverflowResult :
-        assign ZNaNResult = FmtM ? {1'b0, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, ZExpM[7:0], 1'b1, 22'b0};
+                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-        assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, 1'b0, 8'hff, 1'b1, 22'b0};
+                            NormResult;
    end else begin
        assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
                            XInfM|YInfM|ZInfM ? InfResult :
                            KillProdM ? KillProdResult :  
                            Overflow ? OverflowResult :
                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
                            NormResult;
    end
    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} + (RoundAdd[59:29]&{31{AddendStickyM}})};
    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
    assign FMAResM = XNaNM ? XNaNResult :
                        YNaNM ? YNaNResult :
                        ZNaNM ? ZNaNResult :
                        Invalid ? InvalidResult :
                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
                        KillProdM ? KillProdResult :  
 			            Overflow ? OverflowResult :
                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
 endmodule
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -89,7 +89,6 @@ module fpu (
   logic [10:0] 	  XExpM, YExpM, ZExpM;                // input's exponent - memory stage
   logic [52:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
   logic [52:0] 	  XManM, YManM, ZManM;                // input's fraction - memory stage
   logic [10:0] 	  BiasE;                              // bias based on precision (single=7f double=3ff)
   logic 		  XNaNE, YNaNE, ZNaNE;                // is the input a NaN - execute stage
   logic 		  XNaNM, YNaNM, ZNaNM;                // is the input a NaN - memory stage
   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
@ -179,7 +178,7 @@ module fpu (
   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, 
         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
-         .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
+         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
   // FMA
   //   - two stage FMA
@ -231,7 +230,7 @@ module fpu (
         .XSNaNE, .ClassResE);
   // Convert
-   fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
+   fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
   .CvtResE, .CvtFlgE);
   // data to be stored in memory - to IEU
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@ -0,0 +1,361 @@
 `include "wally-config.vh"
 module unpack ( 
    input logic  [`FLEN-1:0] X, Y, Z,
    input logic  [`FPSIZES/3:0]       FmtE,
    input logic  [2:0]  FOpCtrlE,
    output logic        XSgnE, YSgnE, ZSgnE,
    output logic [`NE-1:0] XExpE, YExpE, ZExpE,
    output logic [`NF:0] XManE, YManE, ZManE,
    output logic XNormE,
    output logic XNaNE, YNaNE, ZNaNE,
    output logic XSNaNE, YSNaNE, ZSNaNE,
    output logic XDenormE, YDenormE, ZDenormE,
    output logic XZeroE, YZeroE, ZZeroE,
    output logic XInfE, YInfE, ZInfE,
    output logic XExpMaxE
 );
    logic [`NF-1:0] XFracE, YFracE, ZFracE;
    logic           XExpNonzero, YExpNonzero, ZExpNonzero;
    logic           XFracZero, YFracZero, ZFracZero; // input fraction zero
    logic           XExpZero, YExpZero, ZExpZero; // input exponent zero
    logic           YExpMaxE, ZExpMaxE;  // input exponent all 1s
    if (`FPSIZES == 1) begin
        assign XSgnE = X[`FLEN-1];
        assign YSgnE = Y[`FLEN-1];
        assign ZSgnE = Z[`FLEN-1];
        assign XExpE = X[`FLEN-2:`NF]; 
        assign YExpE = Y[`FLEN-2:`NF]; 
        assign ZExpE = Z[`FLEN-2:`NF]; 
        assign XFracE = X[`NF-1:0];
        assign YFracE = Y[`NF-1:0];
        assign ZFracE = Z[`NF-1:0];
        assign XExpNonzero = |XExpE; 
        assign YExpNonzero = |YExpE;
        assign ZExpNonzero = |ZExpE;
        assign XExpMaxE = &XExpE;
        assign YExpMaxE = &YExpE;
        assign ZExpMaxE = &ZExpE;
    end else if (`FPSIZES == 2) begin
        logic  [`LEN1-1:0]   XLen1, YLen1, ZLen1; // Bottom half or NaN, if not properly NaN boxed
        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
        assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
        assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};  
        assign XSgnE = FmtE ? X[`FLEN-1] : XLen1[`LEN1-1];
        assign YSgnE = FmtE ? Y[`FLEN-1] : YLen1[`LEN1-1];
        assign ZSgnE = FmtE ? Z[`FLEN-1] : ZLen1[`LEN1-1];
        // example double to single conversion:
        // 1023 = 0011 1111 1111
        // 127  = 0000 0111 1111 (subtract this)
        // 896  = 0011 1000 0000
        // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
        // dexp = 0bdd dbbb bbbb 
        // also need to take into account possible zero/denorm/inf/NaN values
        assign XExpE = FmtE ? X[`FLEN-2:`NF] : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; 
        assign YExpE = FmtE ? Y[`FLEN-2:`NF] : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; 
        assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; 
        assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
        assign YFracE = FmtE ? Y[`NF-1:0] : {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
        assign ZFracE = FmtE ? Z[`NF-1:0] : {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
        assign XExpNonzero = FmtE ? |X[`FLEN-2:`NF] : |XLen1[`LEN1-2:`NF1]; 
        assign YExpNonzero = FmtE ? |Y[`FLEN-2:`NF] : |YLen1[`LEN1-2:`NF1];
        assign ZExpNonzero = FmtE ? |Z[`FLEN-2:`NF] : |ZLen1[`LEN1-2:`NF1];
        assign XExpMaxE = FmtE ? &X[`FLEN-2:`NF] : &XLen1[`LEN1-2:`NF1];
        assign YExpMaxE = FmtE ? &Y[`FLEN-2:`NF] : &YLen1[`LEN1-2:`NF1];
        assign ZExpMaxE = FmtE ? &Z[`FLEN-2:`NF] : &ZLen1[`LEN1-2:`NF1];
    end else if (`FPSIZES == 3) begin
        logic  [`LEN1-1:0]   XLen1, YLen1, ZLen1; // Bottom half or NaN, if not properly NaN boxed
        logic  [`LEN2-1:0]   XLen2, YLen2, ZLen2; // Bottom half or NaN, if not properly NaN boxed
        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
        assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
        assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; 
        assign XLen2 = &X[`FLEN-1:`LEN2] ? X[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
        assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
        assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; 
        always_comb begin
            case (FmtE)
                `FMT: begin
                    assign XSgnE = X[`FLEN-1];
                    assign YSgnE = Y[`FLEN-1];
                    assign ZSgnE = Z[`FLEN-1];
                    assign XExpE = X[`FLEN-2:`NF]; 
                    assign YExpE = Y[`FLEN-2:`NF]; 
                    assign ZExpE = Z[`FLEN-2:`NF]; 
                    assign XFracE = X[`NF-1:0];
                    assign YFracE = Y[`NF-1:0];
                    assign ZFracE = Z[`NF-1:0];
                    assign XExpNonzero = |X[`FLEN-2:`NF]; 
                    assign YExpNonzero = |Y[`FLEN-2:`NF];
                    assign ZExpNonzero = |Z[`FLEN-2:`NF];
                    assign XExpMaxE = &X[`FLEN-2:`NF];
                    assign YExpMaxE = &Y[`FLEN-2:`NF];
                    assign ZExpMaxE = &Z[`FLEN-2:`NF];
                end
                `FMT1: begin
                    assign XSgnE = XLen1[`LEN1-1];
                    assign YSgnE = YLen1[`LEN1-1];
                    assign ZSgnE = ZLen1[`LEN1-1];
                    // example double to single conversion:
                    // 1023 = 0011 1111 1111
                    // 127  = 0000 0111 1111 (subtract this)
                    // 896  = 0011 1000 0000
                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
                    // dexp = 0bdd dbbb bbbb 
                    // also need to take into account possible zero/denorm/inf/NaN values
                    assign XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; 
                    assign YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; 
                    assign ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; 
                    assign XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
                    assign YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
                    assign ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
                    assign XExpNonzero = |XLen1[`LEN1-2:`NF1]; 
                    assign YExpNonzero = |YLen1[`LEN1-2:`NF1];
                    assign ZExpNonzero = |ZLen1[`LEN1-2:`NF1];
                    assign XExpMaxE = &XLen1[`LEN1-2:`NF1];
                    assign YExpMaxE = &YLen1[`LEN1-2:`NF1];
                    assign ZExpMaxE = &ZLen1[`LEN1-2:`NF1];
                end
                `FMT2: begin
                    assign XSgnE = XLen2[`LEN2-1];
                    assign YSgnE = YLen2[`LEN2-1];
                    assign ZSgnE = ZLen2[`LEN2-1];
                    // example double to single conversion:
                    // 1023 = 0011 1111 1111
                    // 127  = 0000 0111 1111 (subtract this)
                    // 896  = 0011 1000 0000
                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
                    // dexp = 0bdd dbbb bbbb 
                    // also need to take into account possible zero/denorm/inf/NaN values
                    assign XExpE = {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; 
                    assign YExpE = {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; 
                    assign ZExpE = {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; 
                    assign XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
                    assign YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)};
                    assign ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)};
                    assign XExpNonzero = |XLen2[`LEN2-2:`NF2]; 
                    assign YExpNonzero = |YLen2[`LEN2-2:`NF2];
                    assign ZExpNonzero = |ZLen2[`LEN2-2:`NF2];
                    assign XExpMaxE = &XLen2[`LEN2-2:`NF2];
                    assign YExpMaxE = &YLen2[`LEN2-2:`NF2];
                    assign ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
                end
                default: begin
                    assign XSgnE = 0;
                    assign YSgnE = 0;
                    assign ZSgnE = 0;
                    assign XExpE = 0; 
                    assign YExpE = 0;
                    assign ZExpE = 0; 
                    assign XFracE = 0;
                    assign YFracE = 0;
                    assign ZFracE = 0;
                    assign XExpNonzero = 0; 
                    assign YExpNonzero = 0;
                    assign ZExpNonzero = 0;
                    assign XExpMaxE = 0;
                    assign YExpMaxE = 0;
                    assign ZExpMaxE = 0;
                end
            endcase
        end
    end else begin
        logic  [`LEN1-1:0]   XLen1, YLen1, ZLen1; // Bottom half or NaN, if not properly NaN boxed
        logic  [`LEN2-1:0]   XLen2, YLen2, ZLen2; // Bottom half or NaN, if not properly NaN boxed
        logic  [`LEN2-1:0]   XLen3, YLen3, ZLen3; // Bottom half or NaN, if not properly NaN boxed
        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
        assign XLen1 = &X[`FLEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
        assign YLen1 = &Y[`FLEN-1:`D_LEN] ? Y[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
        assign ZLen1 = &Z[`FLEN-1:`D_LEN] ? Z[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; 
        assign XLen2 = &X[`FLEN-1:`S_LEN] ? X[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
        assign YLen2 = &Y[`FLEN-1:`S_LEN] ? Y[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
        assign ZLen2 = &Z[`FLEN-1:`S_LEN] ? Z[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; 
        assign XLen3 = &X[`FLEN-1:`H_LEN] ? X[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
        assign YLen3 = &Y[`FLEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
        assign ZLen3 = &Z[`FLEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; 
        always_comb begin
            case (FmtE)
                2'b11: begin
                    assign XSgnE = X[`FLEN-1];
                    assign YSgnE = Y[`FLEN-1];
                    assign ZSgnE = Z[`FLEN-1];
                    assign XExpE = X[`FLEN-2:`NF]; 
                    assign YExpE = Y[`FLEN-2:`NF]; 
                    assign ZExpE = Z[`FLEN-2:`NF]; 
                    assign XFracE = X[`NF-1:0];
                    assign YFracE = Y[`NF-1:0];
                    assign ZFracE = Z[`NF-1:0];
                    assign XExpNonzero = |X[`FLEN-2:`NF]; 
                    assign YExpNonzero = |Y[`FLEN-2:`NF];
                    assign ZExpNonzero = |Z[`FLEN-2:`NF];
                    assign XExpMaxE = &X[`FLEN-2:`NF];
                    assign YExpMaxE = &Y[`FLEN-2:`NF];
                    assign ZExpMaxE = &Z[`FLEN-2:`NF];
                end
                2'b01: begin
                    assign XSgnE = XLen1[`LEN1-1];
                    assign YSgnE = YLen1[`LEN1-1];
                    assign ZSgnE = ZLen1[`LEN1-1];
                    // example double to single conversion:
                    // 1023 = 0011 1111 1111
                    // 127  = 0000 0111 1111 (subtract this)
                    // 896  = 0011 1000 0000
                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
                    // dexp = 0bdd dbbb bbbb 
                    // also need to take into account possible zero/denorm/inf/NaN values
                    assign XExpE = {XLen1[`D_LEN-2], {`NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; 
                    assign YExpE = {YLen1[`D_LEN-2], {`NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; 
                    assign ZExpE = {ZLen1[`D_LEN-2], {`NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; 
                    assign XFracE = {XLen1[`D_NE-1:0], (`NF-`D_NE)'(0)};
                    assign YFracE = {YLen1[`D_NE-1:0], (`NF-`D_NE)'(0)};
                    assign ZFracE = {ZLen1[`D_NE-1:0], (`NF-`D_NE)'(0)};
                    assign XExpNonzero = |XLen1[`D_LEN-2:`D_NE]; 
                    assign YExpNonzero = |YLen1[`D_LEN-2:`D_NE];
                    assign ZExpNonzero = |ZLen1[`D_LEN-2:`D_NE];
                    assign XExpMaxE = &XLen1[`D_LEN-2:`D_NE];
                    assign YExpMaxE = &YLen1[`D_LEN-2:`D_NE];
                    assign ZExpMaxE = &ZLen1[`D_LEN-2:`D_NE];
                end
                2'b00: begin
                    assign XSgnE = XLen2[`S_LEN-1];
                    assign YSgnE = YLen2[`S_LEN-1];
                    assign ZSgnE = ZLen2[`S_LEN-1];
                    // example double to single conversion:
                    // 1023 = 0011 1111 1111
                    // 127  = 0000 0111 1111 (subtract this)
                    // 896  = 0011 1000 0000
                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
                    // dexp = 0bdd dbbb bbbb 
                    // also need to take into account possible zero/denorm/inf/NaN values
                    assign XExpE = {XLen2[`S_LEN-2], {`NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; 
                    assign YExpE = {YLen2[`S_LEN-2], {`NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; 
                    assign ZExpE = {ZLen2[`S_LEN-2], {`NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; 
                    assign XFracE = {XLen2[`S_NF-1:0], (`NF-`S_NF)'(0)};
                    assign YFracE = {YLen2[`S_NF-1:0], (`NF-`S_NF)'(0)};
                    assign ZFracE = {ZLen2[`S_NF-1:0], (`NF-`S_NF)'(0)};
                    assign XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; 
                    assign YExpNonzero = |YLen2[`S_LEN-2:`S_NF];
                    assign ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF];
                    assign XExpMaxE = &XLen2[`S_LEN-2:`S_NF];
                    assign YExpMaxE = &YLen2[`S_LEN-2:`S_NF];
                    assign ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];
                end
                2'b10: begin
                    assign XSgnE = XLen3[`H_LEN-1];
                    assign YSgnE = YLen3[`H_LEN-1];
                    assign ZSgnE = ZLen3[`H_LEN-1];
                    // example double to single conversion:
                    // 1023 = 0011 1111 1111
                    // 127  = 0000 0111 1111 (subtract this)
                    // 896  = 0011 1000 0000
                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
                    // dexp = 0bdd dbbb bbbb 
                    // also need to take into account possible zero/denorm/inf/NaN values
                    assign XExpE = {XLen3[`H_LEN-2], {`NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; 
                    assign YExpE = {YLen3[`H_LEN-2], {`NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; 
                    assign ZExpE = {ZLen3[`H_LEN-2], {`NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; 
                    assign XFracE = {XLen3[`H_NF-1:0], (`NF-`H_NF)'(0)};
                    assign YFracE = {YLen3[`H_NF-1:0], (`NF-`H_NF)'(0)};
                    assign ZFracE = {ZLen3[`H_NF-1:0], (`NF-`H_NF)'(0)};
                    assign XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; 
                    assign YExpNonzero = |YLen3[`H_LEN-2:`H_NF];
                    assign ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF];
                    assign XExpMaxE = &XLen3[`H_LEN-2:`H_NF];
                    assign YExpMaxE = &YLen3[`H_LEN-2:`H_NF];
                    assign ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];
                end
            endcase
        end
    end
    assign XExpZero = ~XExpNonzero;
    assign YExpZero = ~YExpNonzero;
    assign ZExpZero = ~ZExpNonzero;
    assign XFracZero = ~|XFracE;
    assign YFracZero = ~|YFracE;
    assign ZFracZero = ~|ZFracE;
    assign XManE = {XExpNonzero, XFracE};
    assign YManE = {YExpNonzero, YFracE};
    assign ZManE = {ZExpNonzero, ZFracE};
    assign XNormE = ~(XExpMaxE|XExpZero);
    // force single precision input to be a NaN if it isn't properly Nan Boxed
    assign XNaNE = XExpMaxE & ~XFracZero;
    assign YNaNE = YExpMaxE & ~YFracZero;
    assign ZNaNE = ZExpMaxE & ~ZFracZero;
    assign XSNaNE = XNaNE&~XFracE[`NF-1];
    assign YSNaNE = YNaNE&~YFracE[`NF-1];
    assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
    assign XDenormE = XExpZero & ~XFracZero;
    assign YDenormE = YExpZero & ~YFracZero;
    assign ZDenormE = ZExpZero & ~ZFracZero;
    assign XInfE = XExpMaxE & XFracZero;
    assign YInfE = YExpMaxE & YFracZero;
    assign ZInfE = ZExpMaxE & ZFracZero;
    assign XZeroE = XExpZero & XFracZero;
    assign YZeroE = YExpZero & YFracZero;
    assign ZZeroE = ZExpZero & ZFracZero;
 endmodule
--- a/pipelined/src/fpu/unpacking.sv
+++ b/pipelined/src/fpu/unpacking.sv
@ -1,95 +0,0 @@
 `include "wally-config.vh"
 module unpack ( 
    input logic  [63:0] X, Y, Z,
    input logic         FmtE,
    input logic  [2:0]  FOpCtrlE,
    output logic        XSgnE, YSgnE, ZSgnE,
    output logic [10:0] XExpE, YExpE, ZExpE,
    output logic [52:0] XManE, YManE, ZManE,
    output logic XNormE,
    output logic XNaNE, YNaNE, ZNaNE,
    output logic XSNaNE, YSNaNE, ZSNaNE,
    output logic XDenormE, YDenormE, ZDenormE,
    output logic XZeroE, YZeroE, ZZeroE,
    output logic [10:0] BiasE,
    output logic XInfE, YInfE, ZInfE,
    output logic XExpMaxE
 );
    logic [51:0]    XFracE, YFracE, ZFracE;
    logic           XExpNonzero, YExpNonzero, ZExpNonzero;
    logic           XFracZero, YFracZero, ZFracZero; // input fraction zero
    logic           XExpZero, YExpZero, ZExpZero; // input exponent zero
    logic           YExpMaxE, ZExpMaxE;  // input exponent all 1s
    logic  [31:0]   XFloat, YFloat, ZFloat; // Bottom half or NaN, if RV64 and not properly NaN boxed
    // Determine if number is NaN as double precision to check single precision NaN boxing
    if (`F_SUPPORTED & ~`D_SUPPORTED) begin  // eventually this should change to FLEN when FLEN isn't hardwared to 64
        assign XFloat = X[31:0]; 
        assign YFloat = Y[31:0];  
        assign ZFloat = Z[31:0]; 
    end else begin
        assign XFloat = &X[`FLEN-1:32] ? X[31:0] : 32'h7fc00000; 
        assign YFloat = &Y[`FLEN-1:32] ? Y[31:0] : 32'h7fc00000;
        assign ZFloat = &Z[`FLEN-1:32] ? Z[31:0] : 32'h7fc00000;
    end   
    assign XSgnE = FmtE ? X[63] : XFloat[31];
    assign YSgnE = FmtE ? Y[63] : YFloat[31];
    assign ZSgnE = FmtE ? Z[63] : ZFloat[31];
    assign XExpE = FmtE ? X[62:52] : {XFloat[30], {3{~XFloat[30]&~XExpZero|XExpMaxE}}, XFloat[29:23]}; 
    assign YExpE = FmtE ? Y[62:52] : {YFloat[30], {3{~YFloat[30]&~YExpZero|YExpMaxE}}, YFloat[29:23]}; 
    assign ZExpE = FmtE ? Z[62:52] : {ZFloat[30], {3{~ZFloat[30]&~ZExpZero|ZExpMaxE}}, ZFloat[29:23]}; 
    assign XFracE = FmtE ? X[51:0] : {XFloat[22:0], 29'b0};
    assign YFracE = FmtE ? Y[51:0] : {YFloat[22:0], 29'b0};
    assign ZFracE = FmtE ? Z[51:0] : {ZFloat[22:0], 29'b0};
    assign XExpNonzero = FmtE ? |X[62:52] : |XFloat[30:23]; 
    assign YExpNonzero = FmtE ? |Y[62:52] : |YFloat[30:23];
    assign ZExpNonzero = FmtE ? |Z[62:52] : |ZFloat[30:23];
    assign XExpZero = ~XExpNonzero;
    assign YExpZero = ~YExpNonzero;
    assign ZExpZero = ~ZExpNonzero;
    assign XFracZero = ~|XFracE;
    assign YFracZero = ~|YFracE;
    assign ZFracZero = ~|ZFracE;
    assign XManE = {XExpNonzero, XFracE};
    assign YManE = {YExpNonzero, YFracE};
    assign ZManE = {ZExpNonzero, ZFracE};
    assign XExpMaxE = FmtE ? &X[62:52] : &XFloat[30:23];
    assign YExpMaxE = FmtE ? &Y[62:52] : &YFloat[30:23];
    assign ZExpMaxE = FmtE ? &Z[62:52] : &ZFloat[30:23];
    assign XNormE = ~(XExpMaxE|XExpZero);
    // force single precision input to be a NaN if it isn't properly Nan Boxed
    assign XNaNE = XExpMaxE & ~XFracZero;
    assign YNaNE = YExpMaxE & ~YFracZero;
    assign ZNaNE = ZExpMaxE & ~ZFracZero;
    assign XSNaNE = XNaNE&~XFracE[51];
    assign YSNaNE = YNaNE&~YFracE[51];
    assign ZSNaNE = ZNaNE&~ZFracE[51];
    assign XDenormE = XExpZero & ~XFracZero;
    assign YDenormE = YExpZero & ~YFracZero;
    assign ZDenormE = ZExpZero & ~ZFracZero;
    assign XInfE = XExpMaxE & XFracZero;
    assign YInfE = YExpMaxE & YFracZero;
    assign ZInfE = ZExpMaxE & ZFracZero;
    assign XZeroE = XExpZero & XFracZero;
    assign YZeroE = YExpZero & YFracZero;
    assign ZZeroE = ZExpZero & ZFracZero;
    assign BiasE = 11'h3ff; // always use 1023 because exponents are unpacked to double precision
 endmodule
--- a/pipelined/testbench/fp/tests/fma-testbench.sv
+++ b/pipelined/testbench/fp/tests/fma-testbench.sv
@ -0,0 +1,279 @@
 `include "wally-config.vh"
 `define PATH "../../../../tests/fp/vectors/"
 string tests[] = '{
    "f16_mulAdd_rne.tv",
    "f16_mulAdd_rz.tv",
    "f16_mulAdd_ru.tv",
    "f16_mulAdd_rd.tv",
    "f16_mulAdd_rnm.tv",
    "f32_mulAdd_rne.tv",
    "f32_mulAdd_rz.tv",
    "f32_mulAdd_ru.tv",
    "f32_mulAdd_rd.tv",
    "f32_mulAdd_rnm.tv",
    "f64_mulAdd_rne.tv",
    "f64_mulAdd_rz.tv",
    "f64_mulAdd_ru.tv",
    "f64_mulAdd_rd.tv",
    "f64_mulAdd_rnm.tv",
    "f128_mulAdd_rne.tv",
    "f128_mulAdd_rz.tv",
    "f128_mulAdd_ru.tv",
    "f128_mulAdd_rd.tv",
    "f128_mulAdd_rnm.tv"
 };
 // steps to run FMA tests
 //    1) create test vectors in riscv-wally/tests/fp with: ./run-all.sh
 //    2) go to riscv-wally/pipelined/testbench/fp/tests
 //    3) run ./sim-wally-batch
 module fmatestbench();
  logic clk;
  logic [31:0] errors=0;
  logic [31:0] vectornum=0;
  logic [`FLEN*4+7+4+4:0] testvectors[6133248:0];
  int i = `ZFH_SUPPORTED ? 0 : `F_SUPPORTED ? 5 : `D_SUPPORTED ? 10 : 15; // set i to the first test that is run
  logic [`FLEN-1:0]     X, Y, Z;  // inputs read from TestFloat
  logic [`FLEN-1:0]	    ans;      // result from TestFloat
  logic [7:0]	 	        flags;    // flags read form testfloat
  logic [2:0]		        FrmE;     // rounding mode
  logic	[`FPSIZES/3:0]  FmtE;     // format - 10 = half, 00 = single, 01 = double, 11 = quad
  logic [3:0]		        FrmRead;  // rounding mode read from testfloat
  logic	[3:0]			      FmtRead;  // format read from testfloat
  logic [`FLEN-1:0]     FMAResM;  // FMA's outputed result
  logic [4:0]           FMAFlgM;  // FMA's outputed flags
  logic [2:0]		        FOpCtrlE; // which opperation
  logic                 wnan;     // is the outputed result NaN
  logic                 ansnan;   // is the correct answer NaN
  // signals needed to connect modules
  logic [`NE+1:0]	  ProdExpE;
  logic 				    AddendStickyE;
  logic 					  KillProdE; 
  logic             XSgnE, YSgnE, ZSgnE;
  logic [`NE-1:0]   XExpE, YExpE, ZExpE;
  logic [`NF:0]     XManE, YManE, ZManE;
  logic             XNormE;
  logic             XExpMaxE;
  logic             XNaNE, YNaNE, ZNaNE;
  logic             XSNaNE, YSNaNE, ZSNaNE;
  logic             XDenormE, YDenormE, ZDenormE;
  logic             XInfE, YInfE, ZInfE;
  logic             XZeroE, YZeroE, ZZeroE;
  logic             YExpMaxE, ZExpMaxE, Mult;
  logic [3*`NF+5:0]	SumE;       
  logic 			      InvZE;
  logic 			      NegSumE;
  logic 			      ZSgnEffE;
  logic 			      PSgnE;
  logic [$clog2(3*`NF+7)-1:0]	NormCntE;
  assign FOpCtrlE = 3'b0; // set to 0 because test float only tests fMADD
  assign Mult = 1'b0;     // set to zero because not testing multiplication
  // check if the calculated result or correct answer is NaN
  always_comb begin
    case (FmtRead)
        4'b11: begin // quad             
          assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
          assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
        end
        4'b01: begin // double                 
          assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
          assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
        end
        4'b00: begin // single
            assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
            assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
        end
        4'b10: begin // half
            assign ansnan = &ans[`H_LEN-2:`H_NF]&(|ans[`H_NF-1:0]);
            assign wnan = &FMAResM[`H_LEN-2:`H_NF]&(|FMAResM[`H_NF-1:0]);
        end
    endcase
  end
  // instantiate devices under test
  unpack unpack(.X, .Y, .Z, .FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
                .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
                .XExpMaxE);
  fma1 fma1(.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
            .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
            .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
            .ProdExpE, .AddendStickyE, .KillProdE); 
  fma2 fma2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE), 
            .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), 
            .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), 
            .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(FmtE), .FrmM(FrmE), 
            .FMAFlgM, .FMAResM, .Mult);
  // produce clock
  always begin
    clk = 1; #5; clk = 0; #5;
  end
  // Read first test
  initial begin
      $display("\n\nRunning %s vectors", tests[i]);
      $readmemh({`PATH, tests[i]}, testvectors);
  end
  // apply test vectors on rising edge of clk
  always @(posedge clk) begin
    #1; 
    flags = testvectors[vectornum][15:8];
    FrmRead = testvectors[vectornum][7:4];
    FmtRead = testvectors[vectornum][3:0];
    if (FmtRead==4'b11 & `Q_SUPPORTED) 	begin       // quad
      X = testvectors[vectornum][16+4*(`Q_LEN)-1:16+3*(`Q_LEN)];
      Y = testvectors[vectornum][16+3*(`Q_LEN)-1:16+2*(`Q_LEN)];
      Z = testvectors[vectornum][16+2*(`Q_LEN)-1:16+`Q_LEN];
      ans = testvectors[vectornum][16+(`Q_LEN-1):16];
    end
    else if (FmtRead==4'b01 & `D_SUPPORTED)	begin	  // double
      X = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+4*(`D_LEN)-1:16+3*(`D_LEN)]};
      Y = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+3*(`D_LEN)-1:16+2*(`D_LEN)]};
      Z = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+2*(`D_LEN)-1:16+`D_LEN]};
      ans = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+(`D_LEN-1):16]};
    end
    else if (FmtRead==4'b00 & `F_SUPPORTED)	begin	  // single
      X = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+4*(`S_LEN)-1:16+3*(`S_LEN)]};
      Y = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+3*(`S_LEN)-1:16+2*(`S_LEN)]};
      Z = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+2*(`S_LEN)-1:16+`S_LEN]};
      ans = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+(`S_LEN-1):16]};
    end
    else if (FmtRead==4'b10 & `ZFH_SUPPORTED)	begin	  // half
      X = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+4*(`H_LEN)-1:16+3*(`H_LEN)]};
      Y = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+3*(`H_LEN)-1:16+2*(`H_LEN)]};
      Z = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+2*(`H_LEN)-1:16+`H_LEN]};
      ans = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+(`H_LEN-1):16]};
    end
    else begin	  
      X = {`FLEN{1'bx}};
      Y = {`FLEN{1'bx}};
      Z = {`FLEN{1'bx}};
      ans = {`FLEN{1'bx}};
    end
    // trim format and rounding mode to appropriate size
    if (`FPSIZES <= 2) FmtE = FmtRead === `FMT; // rewrite format if 2 or less floating formats are supported
    else FmtE = FmtRead[1:0];
    FrmE = FrmRead[2:0];
  end
  // check results on falling edge of clk
    always @(negedge clk) begin
      // quad
        if((FmtRead==4'b11) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0] | (XNaNE&(FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) | (YNaNE&(FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) | (ZNaNE&(FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})))))) begin
          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(XDenormE) $display( "xdenorm ");
          if(YDenormE) $display( "ydenorm ");
          if(ZDenormE) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
          if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
          if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
          if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
          errors = errors + 1;
          if (errors === 1) $stop;
        end
      // double
        if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`D_LEN-2:0] === ans[`D_LEN-2:0] | (XNaNE&(FMAResM[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | (YNaNE&(FMAResM[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})) | (ZNaNE&(FMAResM[`D_LEN-2:0] === {Z[`D_LEN-2:`D_NF],1'b1,Z[`D_NF-2:0]})))))) begin
          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
          errors = errors + 1;
          if (errors === 1) $stop;
        end
      // single
        if((FmtRead==4'b00) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`S_LEN-2:0] === ans[`S_LEN-2:0] | (XNaNE&(FMAResM[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | (YNaNE&(FMAResM[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})) | (ZNaNE&(FMAResM[`S_LEN-2:0] === {Z[`S_LEN-2:`S_NF],1'b1,Z[`S_NF-2:0]})))))) begin
          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
          errors = errors + 1;
          if (errors === 1) $stop;
        end
      // half
        if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`H_LEN-2:0] === ans[`H_LEN-2:0] | (XNaNE&(FMAResM[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | (YNaNE&(FMAResM[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})) | (ZNaNE&(FMAResM[`H_LEN-2:0] === {Z[`H_LEN-2:`H_NF],1'b1,Z[`H_NF-2:0]})))))) begin
          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
          if(FMAFlgM[4] !== 0) $display( "invld ");
          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
          if(FMAFlgM[1] !== 0) $display( "unflw ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
          errors = errors + 1;
          if (errors === 1) $stop;
        end
 	    // if ( vectornum === 3165862) $stop; // uncomment for specific test
      vectornum = vectornum + 1; // increment test
      if (testvectors[vectornum][0] === 1'bx) begin // if reached the end of file
        if (errors) begin // if there were errors
          $display("%s completed with %d tests and %d errors", tests[i], vectornum, errors);
          $stop;
        end
        else begin // if no errors
          if(tests[i] === "") begin // if no more tests
            $display("\nAll tests completed with %d errors\n", errors);
            $stop;
          end
          $display("%s completed successfully with %d tests and %d errors (across all tests)\n", tests[i], vectornum, errors);
          // increment tests - skip some precisions if needed
          if ((i === 4 & ~`F_SUPPORTED) | (i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5;
          if ((i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5;
          if ((i === 14 & ~`Q_SUPPORTED)) i = i+5;
          i = i+1;
          // if no more tests - finish
          if(tests[i] === "") begin
            $display("\nAll tests completed with %d errors\n", errors);
            $stop;
          end 
          // read next files
          $display("Running %s vectors", tests[i]);
          $readmemh({`PATH, tests[i]}, testvectors);
          vectornum = 0;
        end
      end
  end
 endmodule
--- a/pipelined/testbench/fp/tests/fma.do
+++ b/pipelined/testbench/fp/tests/fma.do
@ -0,0 +1,50 @@
 # wally-pipelined.do 
 #
 # Modification by Oklahoma State University & Harvey Mudd College
 # Use with Testbench 
 # James Stine, 2008; David Harris 2021
 # Go Cowboys!!!!!!
 #
 # Takes 1:10 to run RV64IC tests using gui
 # run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
 # Use this wally-pipelined.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do wally-pipelined.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do wally-pipelined.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 # suppress spurious warnngs about 
 # "Extra checking for conflicts with always_comb done at vopt time"
 # because vsim will run vopt
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 # $num = the added words after the call
 vlog +incdir+../../../config/$1 +incdir+../../../config/shared fma-testbench.sv ../../../src/fpu/fma.sv ../../../src/fpu/unpack.sv -suppress 2583 -suppress 7063
 vsim -voptargs=+acc work.fmatestbench
 view wave
 #-- display input and output signals as hexidecimal values
 #do ./wave-dos/peripheral-waves.do
 #add log -recursive /*
 #do wave.do deal with when ready
 #-- Run the Simulation 
 #run 3600 
 run -all
 noview fma-testbench.sv
 view wave
--- a/pipelined/testbench/fp/tests/sim-fma
+++ b/pipelined/testbench/fp/tests/sim-fma
@ -0,0 +1 @@
 vsim -do "do fma.do rv64fp"
--- a/pipelined/testbench/fp/tests/sim-fma-batch
+++ b/pipelined/testbench/fp/tests/sim-fma-batch
@ -0,0 +1 @@
 vsim -c -do "do fma.do rv64fp"
--- a/tests/fp/create_vectors128fma.sh
+++ b/tests/fp/create_vectors128fma.sh
@ -0,0 +1,31 @@
 #!/bin/sh
 BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
 OUTPUT="./vectors"
 $BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
 $BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
 $BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
 $BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
 $BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
 # format: X_Y_Z_answer_flags_Frm_Fmt
 sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rne.tv
 sed -ie 's/$/_0/' $OUTPUT/f128_mulAdd_rne.tv
 sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rne.tv
 sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rz.tv
 sed -ie 's/$/_1/' $OUTPUT/f128_mulAdd_rz.tv
 sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rz.tv
 sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_ru.tv
 sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv
 sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv
 sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rd.tv
 sed -ie 's/$/_2/' $OUTPUT/f128_mulAdd_rd.tv
 sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rd.tv
 sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rnm.tv
 sed -ie 's/$/_4/' $OUTPUT/f128_mulAdd_rnm.tv
 sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rnm.tv
--- a/tests/fp/create_vectors16fma.sh
+++ b/tests/fp/create_vectors16fma.sh
@ -0,0 +1,31 @@
 #!/bin/sh
 BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
 OUTPUT="./vectors"
 $BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
 $BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
 $BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
 $BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
 $BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
 # format: X_Y_Z_answer_flags_Frm_Fmt
 sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rne.tv
 sed -ie 's/$/_0/' $OUTPUT/f16_mulAdd_rne.tv
 sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rne.tv
 sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rz.tv
 sed -ie 's/$/_1/' $OUTPUT/f16_mulAdd_rz.tv
 sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rz.tv
 sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_ru.tv
 sed -ie 's/$/_3/' $OUTPUT/f16_mulAdd_ru.tv
 sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_ru.tv
 sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rd.tv
 sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv
 sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv
 sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rnm.tv
 sed -ie 's/$/_4/' $OUTPUT/f16_mulAdd_rnm.tv
 sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rnm.tv
--- a/tests/fp/create_vectors32fma.sh
+++ b/tests/fp/create_vectors32fma.sh
@ -0,0 +1,31 @@
 #!/bin/sh
 BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
 OUTPUT="./vectors"
 $BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
 $BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
 $BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
 $BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
 $BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
 # format: X_Y_Z_answer_flags_Frm_Fmt
 sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rne.tv
 sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv
 sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv
 sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rz.tv
 sed -ie 's/$/_1/' $OUTPUT/f32_mulAdd_rz.tv
 sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rz.tv
 sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_ru.tv
 sed -ie 's/$/_3/' $OUTPUT/f32_mulAdd_ru.tv
 sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_ru.tv
 sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rd.tv
 sed -ie 's/$/_2/' $OUTPUT/f32_mulAdd_rd.tv
 sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rd.tv
 sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rnm.tv
 sed -ie 's/$/_4/' $OUTPUT/f32_mulAdd_rnm.tv
 sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rnm.tv
--- a/tests/fp/create_vectors64fma.sh
+++ b/tests/fp/create_vectors64fma.sh
@ -0,0 +1,31 @@
 #!/bin/sh
 BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
 OUTPUT="./vectors"
 $BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
 $BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
 $BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
 $BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
 $BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
 # format: X_Y_Z_answer_flags_Frm_Fmt
 sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rne.tv
 sed -ie 's/$/_0/' $OUTPUT/f64_mulAdd_rne.tv
 sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rne.tv
 sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rz.tv
 sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv
 sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv
 sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_ru.tv
 sed -ie 's/$/_3/' $OUTPUT/f64_mulAdd_ru.tv
 sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_ru.tv
 sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rd.tv
 sed -ie 's/$/_2/' $OUTPUT/f64_mulAdd_rd.tv
 sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rd.tv
 sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rnm.tv
 sed -ie 's/$/_4/' $OUTPUT/f64_mulAdd_rnm.tv
 sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rnm.tv
--- a/tests/fp/run_all.sh
+++ b/tests/fp/run_all.sh
@ -8,3 +8,7 @@
 ./create_vectors64cmp.sh
 ./create_vectors64.sh
 ./create_vectorsi.sh
 ./create_vectors16fma.sh
 ./create_vectors32fma.sh
 ./create_vectors64fma.sh
 ./create_vectors128fma.sh
		`@ -1 +1 @@`
			`Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86`				`Subproject commit be67c99bd461742aa1c100bcc0732657faae2230`
`@ -1,3 +1,3 @@`
	`testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat`	`testfloat_gen f128_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat`
	`tr -d ' ' < testFloat > testFloatNoSpace`	`tr -d ' ' < testFloat > testFloatNoSpace`