diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
index cd3e2a4d..bf09314e 100644
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@@ -48,7 +48,7 @@ assign FOpCtrlE = 3'b0;
 // up - 011
 // nearest max mag - 100  
 assign FrmE = 3'b000;
-assign FmtE = 1'b0;
+assign FmtE = 1'b1;
 
     logic  [`FLEN-1:0] X, Y, Z;
     // logic         FmtE;
@@ -76,9 +76,9 @@ assign FmtE = 1'b0;
     assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31];
     assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31];
 
-    assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
-    assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
-    assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]};//{Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; 
+    assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
+    assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
+    assign ZExpE = FmtE ? Addend[62:52] : {Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; 
 
     assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0};
     assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0};
@@ -122,7 +122,7 @@ assign FmtE = 1'b0;
     assign YZeroE = YExpZero & YFracZero;
     assign ZZeroE = ZExpZero & ZFracZero;
 
-    assign BiasE = FmtE ? {1'b0, {`NE-1{1'b1}}} : 13'h7f;
+    assign BiasE = 13'h3ff;
 
 assign	wnan = FmtE ? &FMAResM[`FLEN-2:`NF] && |FMAResM[`NF-1:0] : &FMAResM[30:23] && |FMAResM[22:0]; 
 // assign	XNaNE = FmtE ? &X[62:52] && |X[51:0] : &X[62:55] && |X[54:32]; 
@@ -203,7 +203,7 @@ always @(posedge clk)
 		if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
 		if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
         errors = errors + 1;
-	  //if (errors == 10)
+	  if (errors == 10)
 		$stop;
     end
  vectornum = vectornum + 1;
diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
index c7cf5f09..0741e9d6 100755
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
@@ -1,3 +1,3 @@
-testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
+testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace
 
diff --git a/wally-pipelined/src/fpu/convert_inputs.sv b/wally-pipelined/src/fpu/convert_inputs.sv
index 628519a7..bf56cb00 100755
--- a/wally-pipelined/src/fpu/convert_inputs.sv
+++ b/wally-pipelined/src/fpu/convert_inputs.sv
@@ -8,7 +8,7 @@
 module convert_inputs(
    input [63:0]  op1,      // 1st input operand (A)
    input [63:0]  op2,      // 2nd input operand (B)
-   input [3:0]   op_type,  // Function opcode
+   input [2:0]   op_type,  // Function opcode
    input 	     P,        // Result Precision (0 for double, 1 for single)
 
    output [63:0] Float1,	// Converted 1st input operand
@@ -16,8 +16,6 @@ module convert_inputs(
 );
 
    wire 	 conv_SP;   // Convert from SP to DP
-   wire 	 negate;    // Operation is negation
-   wire 	 abs_val;   // Operation is absolute value
    wire 	 Zexp1;		// One if the exponent of op1 is zero
    wire 	 Zexp2;		// One if the exponent of op2 is zero
    wire 	 Oexp1;		// One if the exponent of op1 is all ones
@@ -25,7 +23,7 @@ module convert_inputs(
 
    // Convert from single precision to double precision if (op_type is 11X
    // and P is 0) or (op_type is not 11X and P is one). 
-   assign conv_SP = (op_type[2]&op_type[1]) ^ P;
+   assign conv_SP = ~P;
 
    // Test if the input exponent is zero, because if it is then the
    // exponent of the converted number should be zero. 
@@ -40,17 +38,14 @@ module convert_inputs(
    assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
 
    // Conditionally convert op2. Lower 29 bits are zero for single precision. 
-   assign Float2[62:29] = conv_SP ? {op2[30], 
-				     {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
+   assign Float2[62:29] = conv_SP ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
 			  : op2[62:29];
    assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
 
    // Set the sign of Float1 based on its original sign and if the operation
    // is negation (op_type = 101) or absolute value (op_type = 100)
 
-   assign negate  = op_type[2] & ~op_type[1] & op_type[0];
-   assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0]; //*** remove abs_val
-   assign Float1[63]  = conv_SP ? (op1[31] ^ negate) & ~abs_val : (op1[63] ^ negate) & ~abs_val;
+   assign Float1[63]  = conv_SP ? op1[31] : op1[63];
    assign Float2[63]  = conv_SP ? op2[31] : op2[63];
 
 endmodule // convert_inputs
diff --git a/wally-pipelined/src/fpu/exception.sv b/wally-pipelined/src/fpu/exception.sv
index c24586a1..bccfa01f 100755
--- a/wally-pipelined/src/fpu/exception.sv
+++ b/wally-pipelined/src/fpu/exception.sv
@@ -1,95 +1,58 @@
 // Exception logic for the floating point adder. Note: We may 
 // actually want to move to where the result is computed.
 
-module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
+module exception (
 
-   input [63:0] A;		// 1st input operand (op1)
-   input [63:0] B;		// 2nd input operand (op2)
-   input [3:0] 	op_type;   	// Function opcode
-   output [3:0] Ztype;		// Indicates type of result (Z)
-   output 	Invalid;	// Invalid operation exception
-   output 	Denorm;		// Denormalized input
-   output       ANorm;          // A is not zero or Denorm
-   output       BNorm;          // B is not zero or Denorm
-   output       Sub;		// The effective operation is subtraction
-   wire		AzeroM;	 	// '1' if the mantissa of A is zero
-   wire		BzeroM;		// '1' if the mantissa of B is zero
-   wire		AzeroE;	 	// '1' if the exponent of A is zero
-   wire		BzeroE;		// '1' if the exponent of B is zero
-   wire		AonesE;	 	// '1' if the exponent of A is all ones
-   wire		BonesE;		// '1' if the exponent of B is all ones
-   wire		ADenorm; 	// '1' if A is a denomalized number
-   wire		BDenorm; 	// '1' if B is a denomalized number
-   wire		AInf;	 	// '1' if A is infinite
-   wire		BInf;	 	// '1' if B is infinite
-   wire		AZero;	 	// '1' if A is 0
-   wire		BZero;	 	// '1' if B is 0
-   wire		ANaN;	 	// '1' if A is a not-a-number
-   wire		BNaN; 		// '1' if B is a not-a-number
-   wire		ASNaN;	 	// '1' if A is a signalling not-a-number
-   wire		BSNaN;	 	// '1' if B is a signalling not-a-number
+   input logic [2:0] 	op_type,   	// Function opcode
+   input logic XSgnE, YSgnE,
+   // input logic [52:0] XManE, YManE,
+   input logic XDenormE, YDenormE,
+   input logic XNormE, YNormE,
+   input logic XZeroE, YZeroE,
+   input logic XInfE, YInfE,
+   input logic XNaNE, YNaNE,
+   input logic XSNaNE, YSNaNE,
+   output logic [3:0] Ztype,		// Indicates type of result (Z)
+   output logic 	Invalid,	// Invalid operation exception
+   output logic 	Denorm,		// Denormalized logic
+   output logic       Sub		// The effective operation is subtraction
+);
    wire		ZQNaN;	 	// '1' if result Z is a quiet NaN
    wire		ZPInf;	 	// '1' if result Z positive infnity
    wire		ZNInf;	 	// '1' if result Z negative infnity
    wire         add_sub;	// '1' if operation is add or subtract
    wire 	converts;       // See if there are any converts   
    
-   parameter [51:0]  fifty_two_zeros = 52'h0000000000000; // Use parameter?
 
 
    // Is this instruction a convert
-   assign converts      = ~(~op_type[1] & ~op_type[2]);
+   assign converts      = op_type[1];
    
-   // Determine if mantissas are all zeros
-   assign AzeroM = (A[51:0] == fifty_two_zeros);
-   assign BzeroM = (B[51:0] == fifty_two_zeros);
 
-   // Determine if exponents are all ones or all zeros 
-   assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
-   assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52];
-   assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]);
-   assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
-
-   // Determine special cases. Note: Zero is not really a special case. 
-   assign ADenorm = AzeroE & ~AzeroM;
-   assign BDenorm = BzeroE & ~BzeroM;
-   assign AInf = AonesE & AzeroM;
-   assign BInf = BonesE & BzeroM;
-   assign ANaN = AonesE & ~AzeroM;
-   assign BNaN = BonesE & ~BzeroM;
-   assign ASNaN = ANaN & ~A[51];
-   assign BSNaN = BNaN & ~B[51];
-   assign AZero = AzeroE & AzeroM;
-   assign BZero = BzeroE & BzeroE;
-
-   // A and B are normalized if their exponents are not zero. 
-   assign ANorm = ~AzeroE;
-   assign BNorm = ~BzeroE;
 
    // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
    // or (A and B are both Infinite and the "effective operation" is 
    // subtraction). 
-   assign add_sub = ~op_type[2] & ~op_type[1];
-   assign Invalid = (ASNaN | BSNaN | 
-		     (add_sub & AInf & BInf & (A[63]^B[63]^op_type[0]))) & ~converts;
+   assign add_sub = ~op_type[1];
+   assign Invalid = (XSNaNE | YSNaNE | (add_sub & XInfE & YInfE & (XSgnE^YSgnE^op_type[0]))) & ~converts;
 
    // The Denorm flag is set if (A is denormlized and the operation is not integer 
    // conversion ) or (if B is normalized and the operation is addition or  subtraction). 
-   assign Denorm = ADenorm&(op_type[2]|~op_type[1]) | BDenorm & add_sub;
+   assign Denorm = XDenormE | YDenormE & add_sub;
 
    // The result is a quiet NaN if (an "Invalid Operation" exception occurs) 
    // or (A is a NaN) or (B is a NaN and the operation uses B).
-   assign ZQNaN = Invalid | ANaN | (BNaN & add_sub);
+   assign ZQNaN = Invalid | XNaNE | (YNaNE & add_sub);
 
    // The result is +Inf if ((A is +Inf) or (B is -Inf and the operation is
    // subtraction) or (B is +Inf and the operation is addition)) and (the
    // result is not a quiet NaN).  
-   assign ZPInf = (AInf&A[63] | add_sub&BInf&(~B[63]^op_type[0]))&~ZQNaN;
+   assign ZPInf = (XInfE&XSgnE | add_sub&YInfE&(~YSgnE^op_type[0]))&~ZQNaN;
 
    // The result is -Inf if ((A is -Inf) or (B is +Inf and the operation is
    // subtraction) or (B is -Inf and the operation is addition)) and the
    // result is not a quiet NaN.  
-   assign ZNInf = (AInf&~A[63] | add_sub&BInf&(B[63]^op_type[0]))&~ZQNaN;
+   assign ZNInf = (XInfE&~XSgnE | add_sub&YInfE&(YSgnE^op_type[0]))&~ZQNaN;
 
    // Set the type of the result as follows:
    // (needs optimization - got lazy or was late)
@@ -102,19 +65,19 @@ module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
    //  0101     +Bzero and -Azero (and vice-versa)
    //  1000     Convert SP to DP (and vice-versa)
 
-   assign Ztype[0] = ((ZQNaN | ZPInf) & ~(~op_type[2] & op_type[1])) | 
-		     ((AZero & BZero & (A[63]^B[63]^op_type[0])) 
+   assign Ztype[0] = (ZQNaN | ZPInf) | 
+		     ((XZeroE & YZeroE & (XSgnE^YSgnE^op_type[0])) 
 		      & ~converts);
-   assign Ztype[1] = ((ZNInf | ZPInf) & ~(~op_type[2] & op_type[1])) | 
-		     (((AZero & BZero & A[63] & B[63] & ~op_type[0]) |
-		       (AZero & BZero & A[63] & ~B[63] & op_type[0])) 
+   assign Ztype[1] = (ZNInf | ZPInf) | 
+		     (((XZeroE & YZeroE & XSgnE & YSgnE & ~op_type[0]) |
+		       (XZeroE & YZeroE & XSgnE & ~YSgnE & op_type[0])) 
 		      & ~converts);
-   assign Ztype[2] = ((AZero & BZero & ~op_type[1] & ~op_type[2]) 
+   assign Ztype[2] = ((XZeroE & YZeroE & ~op_type[1]) 
 		      & ~converts);
-   assign Ztype[3] = (op_type[1] & op_type[2] & ~op_type[0]);
+   assign Ztype[3] = (op_type[1] & ~op_type[0]);
 
    // Determine if the effective operation is subtraction
-   assign Sub = ~(op_type[3] & ~op_type[0]) & ( (op_type[3] & op_type[0]) | (add_sub & (A[63]^B[63]^op_type[0])) );
+   assign Sub = add_sub & (XSgnE^YSgnE^op_type[0]);
  
 endmodule // exception
 
diff --git a/wally-pipelined/src/fpu/exception_div.sv b/wally-pipelined/src/fpu/exception_div.sv
index e917f127..37432068 100755
--- a/wally-pipelined/src/fpu/exception_div.sv
+++ b/wally-pipelined/src/fpu/exception_div.sv
@@ -27,7 +27,7 @@ module exception_div (
    logic 	      ZInf;	 	// '1' if result Z is an infnity
    logic 	      Zero;             // '1' if result is zero   
    
-
+   //***take this module out and add more registers or just recalculate it all
    // Determine if mantissas are all zeros
    assign AzeroM = (A[51:0] == 52'h0);
    assign BzeroM = (B[51:0] == 52'h0);
diff --git a/wally-pipelined/src/fpu/faddcvt.sv b/wally-pipelined/src/fpu/faddcvt.sv
index a604f887..e09deae6 100755
--- a/wally-pipelined/src/fpu/faddcvt.sv
+++ b/wally-pipelined/src/fpu/faddcvt.sv
@@ -33,9 +33,22 @@ module faddcvt(
    input logic          StallM,     // stall the memory stage
    input logic  [63:0]  FSrcXE,		// 1st input operand (A)
    input logic  [63:0]  FSrcYE,		// 2nd input operand (B)
-   input logic  [3:0]   FOpCtrlE, FOpCtrlM,	// Function opcode
+   input logic  [2:0]   FOpCtrlE, FOpCtrlM,	// Function opcode
    input logic          FmtE, FmtM,   	// Result Precision (0 for double, 1 for single)
    input logic  [2:0] 	FrmM,		      // Rounding mode - specify values 
+   input logic XSgnE, YSgnE,
+   input logic [52:0] XManE, YManE,
+   input logic [10:0] XExpE, YExpE,
+   input logic XSgnM, YSgnM,
+   input logic [52:0] XManM, YManM,
+   input logic [10:0] XExpM, YExpM,
+   input logic XDenormE, YDenormE,
+   input logic XNormE, YNormE,
+   input logic XNormM, YNormM,
+   input logic XZeroE, YZeroE,
+   input logic XInfE, YInfE,
+   input logic XNaNE, YNaNE,
+   input logic XSNaNE, YSNaNE,
    output logic [63:0]  FAddResM,	   // Result of operation
    output logic [4:0]   FAddFlgM);   	// IEEE exception flags 
    
@@ -44,53 +57,53 @@ module faddcvt(
    logic [3:0] 	AddSelInvE, AddSelInvM;
    logic [10:0] 	AddExpPostSumE,AddExpPostSumM;
    logic 		   AddCorrSignE, AddCorrSignM;
-   logic          AddOp1NormE, AddOp1NormM;
-   logic          AddOp2NormE, AddOp2NormM;
    logic          AddOpANormE,  AddOpANormM;
    logic          AddOpBNormE, AddOpBNormM;
    logic          AddInvalidE, AddInvalidM;
    logic 		   AddDenormInE, AddDenormInM;
    logic          AddSwapE, AddSwapM;
    logic          AddSignAE, AddSignAM;
-   logic 		   AddConvertE, AddConvertM;
-   logic [63:0] 	AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
    logic [11:0] 	AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
    logic [10:0] 	AddExponentE, AddExponentM;
 
 
-   fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, 
+   fpuaddcvt1 fpadd1 (.FOpCtrlE, .FmtE, .AddExponentE, 
                      .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, 
-                     .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, 
-                     .AddDenormInE, .AddConvertE, .AddSwapE);
+   .XSgnE, .YSgnE,.XManE, .YManE, .XExpE, .YExpE,  .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
+                     .AddCorrSignE, .AddSignAE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, 
+                     .AddDenormInE, .AddSwapE);
 
    // E/M pipeline registers
    flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); 
    flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); 
    flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); 
-   flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); 
-   flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); 
    flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); 
    flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); 
    flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
-   flopenrc #(14) EMRegAdd9(clk, reset, FlushM, ~StallM, 
-                           {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddSignAE},
-                           {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM}); 
+   flopenrc #(11) EMRegAdd9(clk, reset, FlushM, ~StallM, 
+                           {AddSelInvE, AddCorrSignE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddSwapE, AddSignAE},
+                           {AddSelInvM, AddCorrSignM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddSwapM, AddSignAM}); 
 
                      
-   fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, 
-                     .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, 
-                     .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, 
-                     .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
+   fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM,  .XNormM, .YNormM, 
+                     .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
+                     .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, 
+                     .AddSignAM, .AddCorrSignM, .AddSwapM, .FAddResM, .FAddFlgM);
 endmodule
 
 module fpuaddcvt1 (
-   input logic [63:0]   FSrcXE,		// 1st input operand (A)
-   input logic [63:0]   FSrcYE,		// 2nd input operand (B)
-   input logic [3:0]	   FOpCtrlE,	// Function opcode
+   input logic [2:0]	   FOpCtrlE,	// Function opcode
    input logic 	      FmtE,   		// Result Precision (1 for double, 0 for single)
+   input logic XSgnE, YSgnE,
+   input logic [10:0] XExpE, YExpE,
+   input logic [52:0] XManE, YManE,
+   input logic XDenormE, YDenormE,
+   input logic XNormE, YNormE,
+   input logic XZeroE, YZeroE,
+   input logic XInfE, YInfE,
+   input logic XNaNE, YNaNE,
+   input logic XSNaNE, YSNaNE,
 
-   output logic [63:0] 	AddFloat1E, 
-   output logic [63:0] 	AddFloat2E,
    output logic [10:0] 	AddExponentE,
    output logic [10:0]	AddExpPostSumE,
    output logic [11:0]  AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0]
@@ -98,11 +111,9 @@ module fpuaddcvt1 (
    output logic [3:0]   AddSelInvE,
    output logic         AddCorrSignE,
    output logic 	      AddSignAE,
-   output logic	      AddOp1NormE, AddOp2NormE,
    output logic	      AddOpANormE, AddOpBNormE,
    output logic	      AddInvalidE,
    output logic 	      AddDenormInE,
-   output logic 	      AddConvertE,
    output logic         AddSwapE
    );
 
@@ -112,7 +123,7 @@ module fpuaddcvt1 (
    wire		    ZV_mantissaB;
 
    wire          P;
-   assign P = ~FmtE;
+   assign P = ~(FmtE^FOpCtrlE[1]);
 
    wire [63:0] IntValue;
    wire [11:0] exp1, exp2;
@@ -130,22 +141,15 @@ module fpuaddcvt1 (
    wire 	      zeroB;
    wire [5:0]	align_shift;
 
-   // Convert the input operands to their appropriate forms based on 
-   // the orignal operands, the FOpCtrlE , and their precision P. 
-   // Single precision inputs are converted to double precision 
-   // and the sign of the first operand is set appropratiately based on
-   // if the operation is absolute value or negation. 
-
-   convert_inputs conv1 (.Float1(AddFloat1E), .Float2(AddFloat2E), .op1(FSrcXE), .op2(FSrcYE), .op_type(FOpCtrlE), .P);
-
    // Test for exceptions and return the "Invalid Operation" and
    // "Denormalized" Input Flags. The "AddSelInvE" is used in
    // the third pipeline stage to select the result. Also, AddOp1NormE
    // and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
    // sub is one if the effective operation is subtaction. 
 
-   exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, 
-		   AddFloat1E, AddFloat2E, FOpCtrlE);
+   exception exc1 (.Ztype(AddSelInvE), .Invalid(AddInvalidE), .Denorm(AddDenormInE), .Sub(sub), 
+   .XSgnE, .YSgnE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
+	.op_type(FOpCtrlE));
 
    // Perform Exponent Subtraction (used for alignment). For performance
    // both exponent subtractions are performed in parallel. This was 
@@ -153,25 +157,25 @@ module fpuaddcvt1 (
    // the two parallel additions. The input values are zero-extended to 12 
    // bits prior to performing the addition. 
 
-   assign exp1 = {1'b0, AddFloat1E[62:52]};
-   assign exp2 = {1'b0, AddFloat2E[62:52]};
+   assign exp1 = {1'b0, XExpE};
+   assign exp2 = {1'b0, YExpE};
    assign exp_diff1 = exp1 - exp2;
-   assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
+   assign exp_diff2 = AddDenormInE ? ({YSgnE, YExpE} - {XSgnE, XExpE}): exp2 - exp1;
 
    // The second operand (B) should be set to zero, if FOpCtrlE does not
    // specify addition or subtraction
-   assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
+   assign zeroB = FOpCtrlE[1];
 
    // Swapped operands if zeroB is not one and exp1 < exp2. 
    // Swapping causes exp2 to be used for the result exponent. 
    // Only the exponent of the larger operand is used to determine
    // the final result. 
    assign AddSwapE = exp_diff1[11] & ~zeroB;
-   assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
-   assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
-   assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
-   assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
-   assign AddSignAE     = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];   
+   assign AddExponentE = AddSwapE ? YExpE : XExpE;
+   assign AddExpPostSumE = AddSwapE ? YExpE : XExpE;
+   assign mantissaA = AddSwapE ? YManE[51:0] : XManE[51:0];
+   assign mantissaB = AddSwapE ? XManE[51:0] : YManE[51:0];
+   assign AddSignAE     = AddSwapE ? YSgnE : XSgnE;   
 
    // Leading-Zero Detector. Determine the size of the shift needed for
    // normalization. If sum_corrected is all zeros, the exp_valid is 
@@ -201,8 +205,8 @@ module fpuaddcvt1 (
    // and loss of sign information. The two bits to the right of the 
    // original mantissa form the "guard" and "round" bits that are used
    // to round the result. 
-   assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
-   assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
+   assign AddOpANormE = AddSwapE ? YNormE : XNormE;
+   assign AddOpBNormE = AddSwapE ? XNormE : YNormE;
    assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
    assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
 
@@ -223,19 +227,18 @@ module fpuaddcvt1 (
    // and the exponent value is left unchanged. 
    // Under denormalized cases, the exponent before the rounder is set to 1
    // if the normal shift value is 11.
-   assign AddConvertE       = ~FOpCtrlE[2] & FOpCtrlE[1];
-   assign mantissaA3    = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
+   assign mantissaA3    = AddDenormInE ? ({12'h0, mantissaA}) : {mantissaA1, 7'h0};
 
    // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to 
    // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six  
    // zeros. 
-   assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
-   assign mantissaB3[6]    = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
-   assign mantissaB3[5:0]  = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
+   assign mantissaB3[63:7] = AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}};
+   assign mantissaB3[6]    = AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB;
+   assign mantissaB3[5:0]  = AddDenormInE ? mantissaB[5:0] : 6'h0;
 
    // The sign of the result needs to be corrected if the true
    // operation is subtraction and the input operands were swapped. 
-   assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
+   assign AddCorrSignE = ~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
 
    // 64-bit Mantissa Adder/Subtractor
    cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
@@ -281,31 +284,31 @@ endmodule // fpadd
 
 
 module fpuaddcvt2 (
-   input [2:0] 	FrmM,		// Rounding mode - specify values 
-   input [3:0]	FOpCtrlM,	// Function opcode
-   input 	FmtM,   		// Result Precision (0 for double, 1 for single)
-   input [63:0] AddSumM, AddSumTcM,
-   input [63:0] 	 AddFloat1M, 
-   input [63:0] 	 AddFloat2M,
-   input [11:0]	 AddExp1DenormM, AddExp2DenormM,
-   input [10:0] 	 AddExponentM, AddExpPostSumM,
-   input [3:0] 	 AddSelInvM,
-   input		 AddOp1NormM, AddOp2NormM,
-   input		 AddOpANormM, AddOpBNormM,
-   input		 AddInvalidM,
-   input 	 AddDenormInM, 
-   input 	 AddSignAM, 
-   input         AddCorrSignM,
-   input 	 AddConvertM,
-   input          AddSwapM,
+   input logic [2:0] 	FrmM,		// Rounding mode - specify values 
+   input logic [2:0]	FOpCtrlM,	// Function opcode
+   input logic 	FmtM,   		// Result Precision (0 for double, 1 for single)
+   input logic [63:0] AddSumM, AddSumTcM,
+   input logic [11:0]	 AddExp1DenormM, AddExp2DenormM,
+   input logic [10:0] 	 AddExponentM, AddExpPostSumM,
+   input logic [3:0] 	 AddSelInvM,
+   input logic XSgnM, YSgnM,
+   input logic [52:0] XManM, YManM,
+   input logic [10:0] XExpM, YExpM,
+   input logic XNormM, YNormM,
+   input logic		 AddOpANormM, AddOpBNormM,
+   input logic		 AddInvalidM,
+   input logic 	 AddDenormInM, 
+   input logic 	 AddSignAM, 
+   input logic         AddCorrSignM,
+   input logic          AddSwapM,
 
-   output [63:0] FAddResM,	// Result of operation
-   output [4:0]  FAddFlgM   	// IEEE exception flags 
+   output logic [63:0] FAddResM,	// Result of operation
+   output logic [4:0]  FAddFlgM   	// IEEE exception flags 
 );
    wire 	 AddDenormM;   	// AddDenormM on input or output   
 
    wire          P;
-   assign P = ~FmtM;
+   assign P = ~(FmtM^FOpCtrlM[1]);
 
    wire [10:0]   exp_pre;
    wire [63:0] 	 Result;   
@@ -338,15 +341,15 @@ module fpuaddcvt2 (
    //cases/conversion cases
    assign exp_pre       = AddDenormInM ?
                           ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
-                          : (AddConvertM ? 11'b10000111100 : AddExponentM);
+                          : AddExponentM;
 
 
    // Finds normal underflow result to determine whether to round final AddExponentM down
    // Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
-   assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
-   assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
-   assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
-   assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
+   assign Float1_sum_comp = ~(XManM[51:0] > AddSumM[51:0]);
+   assign Float2_sum_comp = ~(YManM[51:0] > AddSumM[51:0]);
+   assign Float1_sum_tc_comp = ~(XManM[51:0] > AddSumTcM[51:0]);
+   assign Float2_sum_tc_comp = ~(YManM[51:0] > AddSumTcM[51:0]);
 
    // Determines the correct Float value to compare based on AddSwapM result
    assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
@@ -357,16 +360,16 @@ module fpuaddcvt2 (
 
    // If the signs are different and both operands aren't denormalized
    // the normal underflow bit is needed and therefore updated.
-   assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
+   assign normal_underflow = ((XSgnM ^ YSgnM) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
 
    // Determine the correct sign of the result
-   assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];   
+   assign sign_corr = (AddCorrSignM ^ AddSignAM) ^ AddSumM[63];   
    
    // If the AddSumM is negative, use its two complement instead. 
    // This value has to be 64-bits to correctly handle the 
    // case 10...00
-   assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
-			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
+   assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (XSgnM ~^ YSgnM) & FOpCtrlM[0] ) | ((XSgnM ^ YSgnM) & ~FOpCtrlM[0]) ))
+			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : (AddSumM[63] ? AddSumTcM : AddSumM);
 
    // Finds normal underflow result to determine whether to round final AddExponentM down
    //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
@@ -384,7 +387,7 @@ module fpuaddcvt2 (
    // be right shifted. It outputs the normalized AddSumM. 
    barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
   
-   assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
+   assign sum_norm_w_bypass = sum_norm;
 
    // Round the mantissa to a 52-bit value, with the leading one
    // removed. If the result is a single precision number, the actual 
@@ -397,10 +400,10 @@ module fpuaddcvt2 (
    // help in processor reservation station detection of load/stores. In
    // other words, the processor would like to know ahead of time that
    // if the result is an exception then don't load or store.
-   rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid, 
-		   AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
-		   AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
-		   AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
+   rounder round1 (.Result, .DenormIO, .Flags(FlagsIn), .rm(FrmM), .P, .OvEn(AddOvEnM), .UnEn(AddUnEnM), .exp_valid, 
+		   .sel_inv(AddSelInvM), .Invalid(AddInvalidM), .DenormIn(AddDenormInM), .Asign(sign_corr), .Aexp(exp_pre), .norm_shift, .A(sum_norm_w_bypass),
+		   .exponent_postsum(AddExpPostSumM), .A_Norm(XNormM), .B_Norm(YNormM), .exp_A_unmodified({XSgnM, XExpM}), .exp_B_unmodified({YSgnM, YExpM}),
+		   .normal_overflow(AddNormOvflowM), .normal_underflow, .swap(AddSwapM), .op_type(FOpCtrlM), .sum(AddSumM));
 
    // Store the final result and the exception flags in registers.
    assign FAddResM = Result;
diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv
index 6eead441..61a4af0a 100755
--- a/wally-pipelined/src/fpu/fctrl.sv
+++ b/wally-pipelined/src/fpu/fctrl.sv
@@ -9,7 +9,7 @@ module fctrl (
   output logic       FRegWriteD,  // FP register write enable
   output logic       FDivStartD,  // Start division or squareroot
   output logic [2:0] FResultSelD, // select result to be written to fp register
-  output logic [3:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
+  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
   output logic [1:0] FResSelD,    // select one of the results done in the memory stage
   output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
   output logic       FmtD,        // precision - single-0 double-1
@@ -24,82 +24,82 @@ module fctrl (
     case(OpD)
     // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
       7'b0000111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw
-                    3'b011:  ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld
-                    default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b1_0_000_000_00_00_0_0; // flw
+                    3'b011:  ControlsD = `FCTRLW'b1_0_000_001_00_00_0_0; // fld
+                    default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                   endcase
       7'b0100111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw
-                    3'b011:  ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd
-                    default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b0_0_000_010_00_00_0_0; // fsw
+                    3'b011:  ControlsD = `FCTRLW'b0_0_000_011_00_00_0_0; // fsd
+                    default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                   endcase
-      7'b1000011:   ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd
-      7'b1000111:   ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub
-      7'b1001011:   ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub
-      7'b1001111:   ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd
+      7'b1000011:   ControlsD = `FCTRLW'b1_0_001_000_00_00_0_0; // fmadd
+      7'b1000111:   ControlsD = `FCTRLW'b1_0_001_001_00_00_0_0; // fmsub
+      7'b1001011:   ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub
+      7'b1001111:   ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd
       7'b1010011: casez(Funct7D)
-                    7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd
-                    7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub
-                    7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul
-                    7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv
-                    7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt
+                    7'b00000??: ControlsD = `FCTRLW'b1_0_010_000_00_00_0_0; // fadd
+                    7'b00001??: ControlsD = `FCTRLW'b1_0_010_001_00_00_0_0; // fsub
+                    7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul
+                    7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv
+                    7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt
                     7'b00100??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn
-                                  3'b010:  ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_000_01_00_0_0; // fsgnj
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_001_01_00_0_0; // fsgnjn
+                                  3'b010:  ControlsD = `FCTRLW'b1_0_100_010_01_00_0_0; // fsgnjx
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                 endcase
                     7'b00101??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_111_10_00_0_0; // fmin
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_101_10_00_0_0; // fmax
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                 endcase
                     7'b10100??: case(Funct3D)
-                                  3'b010:  ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq
-                                  3'b001:  ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt
-                                  3'b000:  ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  3'b010:  ControlsD = `FCTRLW'b0_1_100_010_00_00_0_0; // feq
+                                  3'b001:  ControlsD = `FCTRLW'b0_1_100_001_00_00_0_0; // flt
+                                  3'b000:  ControlsD = `FCTRLW'b0_1_100_011_00_00_0_0; // fle
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                 endcase
                     7'b11100??: if (Funct3D == 3'b001)
-                                  ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass
-                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
-                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
-                                else                            ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  ControlsD = `FCTRLW'b0_1_100_000_00_10_0_0; // fclass
+                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_100_00_01_0_0; // fmv.x.w
+                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_101_00_01_0_0; // fmv.x.d
+                                else                            ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                     7'b1101000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.s.w
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.s.wu
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.s.l
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.s.lu
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.s.w
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.s.wu
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.s.l
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.s.lu
                                   default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
                                 endcase
                     7'b1100000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.s
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.s
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.s
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.s
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.s
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.s
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.s
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.s
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                 endcase
-                    7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
-                    7'b0100000: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.s.d
+                    7'b1111000: ControlsD = `FCTRLW'b1_0_100_000_00_00_0_0; // fmv.w.x
+                    7'b0100000: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.s.d
                     7'b1101001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.d.w
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.d.wu
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.d.l
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.d.lu
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.d.w
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.d.wu
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.d.l
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.d.lu
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                 endcase
                     7'b1100001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.d
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.d
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.d
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.d
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.d
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.d
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.d
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.d
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                 endcase
-                    7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
-                    7'b0100001: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.d.s
-                    default:    ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                    7'b1111001: ControlsD = `FCTRLW'b1_0_100_001_00_00_0_0; // fmv.d.x
+                    7'b0100001: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.d.s
+                    default:    ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                   endcase
-      default:      ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+      default:      ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
     endcase
 
   // unswizzle control bits
@@ -117,7 +117,7 @@ module fctrl (
   // Precision
   //    0-single
   //    1-double
-  assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
+  assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : FResultSelD == 3'b010 ? Funct7D[0]^FOpCtrlD[1] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
 
   // FResultSel:
   //    000 - ReadRes - load
diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv
index a8f845a6..17da8030 100644
--- a/wally-pipelined/src/fpu/fcvt.sv
+++ b/wally-pipelined/src/fpu/fcvt.sv
@@ -11,7 +11,7 @@ module fcvt (
     input logic             XDenormE,   // is X denormalized
     input logic [10:0]      BiasE,      // bias - depends on precision (max exponent/2)
     input logic [`XLEN-1:0] SrcAE,      // integer input
-    input logic [3:0]       FOpCtrlE,   // chooses which instruction is done (full list below)
+    input logic [2:0]       FOpCtrlE,   // chooses which instruction is done (full list below)
     input logic [2:0]       FrmE,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
     input logic             FmtE,       // precision 1 = double 0 = single
     output logic [63:0]     CvtResE,    // convert final result
@@ -43,27 +43,27 @@ module fcvt (
     logic               RoundSgn;           // sign of the rounded result
 
     // FOpCtrlE:
-      //  fcvt.w.s  = 0010
-      //  fcvt.wu.s = 0110
-      //  fcvt.s.w  = 0001
-      //  fcvt.s.wu = 0101
-      //  fcvt.l.s  = 1010
-      //  fcvt.lu.s = 1110
-      //  fcvt.s.l  = 1001
-      //  fcvt.s.lu = 1101
-      //  fcvt.w.d  = 0010 
-      //  fcvt.wu.d = 0110
-      //  fcvt.d.w  = 0001
-      //  fcvt.d.wu = 0101
-      //  fcvt.l.d  = 1010
-      //  fcvt.lu.d = 1110
-      //  fcvt.d.l  = 1001
-      //  fcvt.d.lu = 1101
+      //  fcvt.w.s  = 001
+      //  fcvt.wu.s = 011
+      //  fcvt.s.w  = 000
+      //  fcvt.s.wu = 010
+      //  fcvt.l.s  = 101
+      //  fcvt.lu.s = 111
+      //  fcvt.s.l  = 100
+      //  fcvt.s.lu = 110
+      //  fcvt.w.d  = 001 
+      //  fcvt.wu.d = 011
+      //  fcvt.d.w  = 000
+      //  fcvt.d.wu = 010
+      //  fcvt.l.d  = 101
+      //  fcvt.lu.d = 111
+      //  fcvt.d.l  = 100
+      //  fcvt.d.lu = 110
       //  {long, unsigned, to int, from int}
    
     // calculate signals based off the input and output's size
-    assign Res64 = (FOpCtrlE[1]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[0]);
-    assign In64 =  (FOpCtrlE[0]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[1]);
+    assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]);
+    assign In64 =  (~FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&FOpCtrlE[0]);
     assign SubBits = In64 ? 8'd64 : 8'd32;
     assign Bits = Res64 ? 8'd64 : 8'd32;
 
@@ -73,11 +73,11 @@ module fcvt (
 ////////////////////////////////////////////////////////
 
     // position the input in the most significant bits
-    assign IntIn = FOpCtrlE[3] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0};
+    assign IntIn = FOpCtrlE[2] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0};
     // make the integer positive
-    assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
+    assign PosInt = IntIn[64-1]&~FOpCtrlE[1] ? -IntIn : IntIn;
     // determine the integer's sign
-    assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
+    assign ResSgn = ~FOpCtrlE[1] ? IntIn[64-1] : 1'b0;
     
 	// Leading one detector
 	logic [8:0]	i;
@@ -97,8 +97,8 @@ module fcvt (
 
 
     // select the shift value and amount based on operation (to fp or int)
-    assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
-    assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XManE} : {PosInt, 52'b0};
+    assign ShiftCnt = FOpCtrlE[0] ? ExpVal : LZResP;
+    assign ShiftVal = FOpCtrlE[0] ? {{64-2{1'b0}}, XManE} : {PosInt, 52'b0};
 
 	// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
 	// if the shift is negitive add a bit for sticky bit calculation
@@ -111,35 +111,35 @@ module fcvt (
     // calculate sticky bit 
     //  - take into account the possible right shift from before
     //  - the sticky bit calculation covers three diffrent sizes depending on the opperation
-    assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
+    assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (~FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (~FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
 
     
     // determine guard, round, and least significant bit of the result
-    assign Guard = FOpCtrlE[1] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
-    assign Round = FOpCtrlE[1] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
-    assign LSB = FOpCtrlE[1] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
+    assign Guard = FOpCtrlE[0] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
+    assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
+    assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
 
     always_comb begin
         // Determine if you add 1
         case (FrmE)
             3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
             3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
-            3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
+            3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[0]) | (ResSgn&~FOpCtrlE[0]);//round down
+            3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[0]) | (~ResSgn&~FOpCtrlE[0]);//round up
             3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
             default: CalcPlus1 = 1'bx;
         endcase
     end
 
     // dont tound if the result is exact
-    assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]);
+    assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[0]);
 
     // round the shifted mantissa
     assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
     assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 :  {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
 
     // fit the rounded result into the appropriate size and take the 2's complement if needed
-     assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : 
+     assign Rounded = Res64 ? XSgnE&FOpCtrlE[0] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : 
 			      XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
 
     // extract the MSB and Sign for later use (will be used to determine underflow and overflow)
@@ -148,29 +148,29 @@ module fcvt (
 
 
     // check if the result overflows
-    assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
+    assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[1]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
 
     // check if the result underflows (this calculation changes if the result is signed or unsigned)
-    assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]);    // assign CvtIntRes =  (XSgnE | ShiftCnt[12]) ? {64{1'b0}}  : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
+    assign Uf = FOpCtrlE[1] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]);    // assign CvtIntRes =  (XSgnE | ShiftCnt[12]) ? {64{1'b0}}  : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
     
     // calculate the result's sign
-    assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
+    assign SgnRes = ~FOpCtrlE[2] & FOpCtrlE[0];
 
     // select the integer result
-    assign CvtIntRes = Of ? FOpCtrlE[2] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
-                    Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
+    assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
+                    Uf ? FOpCtrlE[1] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
 		            Rounded[64-1:0];
 
     // select the floating point result            
     assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]};
 
     // select the result
-    assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
+    assign CvtResE = ~FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
 
     // calculate the flags
     //      - to int only sets the invalid flag
     //      - from int only sets the inexact flag
-    assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
+    assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]};
 
 
 
diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv
index 0601db06..f651d237 100644
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@@ -23,7 +23,7 @@
 ///////////////////////////////////////////
 
 `include "wally-config.vh"
-// `include "../../../config/rv64icfd/wally-config.vh"
+//  `include "../../../config/rv64icfd/wally-config.vh"
 
 module fma(
     input logic                 clk,
@@ -106,6 +106,7 @@ module fma1(
     logic [`NE+1:0]     AlignCnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
     logic [4*`NF+5:0]   ZManShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
     logic [4*`NF+5:0]   ZManPreShifted;     // input to the alignment shifter U(NF+5.3NF+1)
+    logic [`NE-2:0]     Denorm;             // Denormalized input value
 
     ///////////////////////////////////////////////////////////////////////////////
     // Calculate the product
@@ -116,8 +117,9 @@ module fma1(
     ///////////////////////////////////////////////////////////////////////////////
    
     // verilator lint_off WIDTH
+    assign Denorm = FmtE ? 1 : -126+1023;
     assign ProdExpE = (XZeroE|YZeroE) ? 0 :
-                 XExpE + YExpE - BiasE + XDenormE + YDenormE;
+                 XExpE + YExpE - BiasE + ({`NE-1{XDenormE}}&Denorm) + ({`NE-1{YDenormE}}&Denorm);
     // verilator lint_on WIDTH
 
     // Calculate the product's mantissa
@@ -133,7 +135,7 @@ module fma1(
     //      - positive means the product is larger, so shift Z right
     //      - Denormal numbers have an an exponent value of 1, however they are
     //        represented with an exponent of 0. add one to the exponent if it is a denormal number
-    assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
+    assign AlignCnt = ProdExpE - (ZExpE + ({`NE-1{ZDenormE}}&Denorm));
 
     // Defualt Addition without shifting
     //          |   54'b0    |  106'b(product)  | 2'b0 |
@@ -320,7 +322,9 @@ module fma2(
     //assign FracLen = `NF;
 
     // Determine if the result is denormal
-    assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
+    logic [`NE+1:0] SumExpTmpTmp;
+    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
+    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
 
     assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
 
@@ -511,7 +515,7 @@ module fma2(
                                     ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
                                                                                                                           {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
     assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
-    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
+    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
     assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + (CalcPlus1&(AddendStickyM|FrmM[1])) : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
     assign FMAResM = XNaNM ? XNaNResult :
                         YNaNM ? YNaNResult :
diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv
index 19ef41b9..a2534149 100755
--- a/wally-pipelined/src/fpu/fpdiv.sv
+++ b/wally-pipelined/src/fpu/fpdiv.sv
@@ -75,15 +75,8 @@ module fpdiv (
    // div/sqrt
          //  fdiv  = 0
          //  fsqrt = 1
-
-   // Convert the input operands to their appropriate forms based on 
-   // the orignal operands, the op_type , and their precision P. 
-   // Single precision inputs are converted to double precision 
-   // and the sign of the first operand is set appropratiately based on
-   // if the operation is absolute value or negation.   
-   convert_inputs_div conv1 (.op1, .op2, .op_type, .P, 
-                           // outputs:
-                           .Float1, .Float2b(Float2));
+   assign Float1 = op1;
+   assign Float2 = op_type ? op1 : op2;   
 
    // Test for exceptions and return the "Invalid Operation" and
    // "Denormalized" Input Flags. The "sel_inv" is used in
diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index 4e7d898e..04823580 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -57,7 +57,7 @@ module fpu (
   //                single stored in a double: | 32 1s | single precision value |
   //    - sets the underflow after rounding
   
-  generate if (`F_SUPPORTED | `D_SUPPORTED) begin 
+  generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu
 
   // control signals
 	logic 		  FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
@@ -67,7 +67,7 @@ module fpu (
 	logic 		  FWriteIntD;                         // Write to integer register
 	logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
 	logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register
-	logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM;           // Select which opperation to do in each component
+	logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM;           // Select which opperation to do in each component
 	logic [1:0] FResSelD, FResSelE, FResSelM;           // Select one of the results that finish in the memory stage
 	logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;  // Select the result written to the integer resister
 	logic [4:0] Adr1E, Adr2E, Adr3E;                    // adresses of each input
@@ -97,7 +97,8 @@ module fpu (
 	logic 		   XInfE, YInfE, ZInfE;           // is the input infinity - execute stage
 	logic 		   XInfM, YInfM, ZInfM;           // is the input infinity - memory stage
 	logic 		   XExpMaxE;                      // is the exponent all ones (max value)
-	logic 		   XNormE;                        // is X normal
+	logic 		   XNormE,YNormE;                 // is normal
+	logic 		   XNormM,YNormM;                 // is normal
 	
 	
 	// result and flag signals
@@ -171,7 +172,7 @@ module fpu (
 	flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
 	flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                                                        {Adr1E,         Adr2E,         Adr3E});
-	flopenrc #(18) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+	flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, 
 				  {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
 				  {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
 	
@@ -203,11 +204,11 @@ module fpu (
   // unpacking unit
   //    - splits FP inputs into their various parts
   //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
-	unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, 
+	unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FResultSelE, .FmtE, 
                       // outputs:
                       .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
                       .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
-                      .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
+                      .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE, .YNormE);
 
   // FMA
   //    - two stage FMA
@@ -222,7 +223,7 @@ module fpu (
 		 .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
      .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
      .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-		 .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), 
+		 .FOpCtrlE, .FOpCtrlM, 
 		 .FmtE, .FmtM, .FrmM, 
      // outputs:
      .FMAFlgM, .FMAResM);
@@ -240,10 +241,10 @@ module fpu (
   //    - if not captured any forwarded inputs will change durring computation
   //        - this problem is caused by stalling the execute stage
   //    - the other units don't have this problem, only div/sqrt stalls the execute stage
-	flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
+	flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
 				   .en(1'b1), .clear(FDivSqrtDoneE),
 				   .reset(reset),  .clk(FDivBusyE));
-	flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
+	flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
 				   .en(1'b1), .clear(FDivSqrtDoneE),
 				   .reset(reset),  .clk(FDivBusyE));
 	
@@ -261,6 +262,8 @@ module fpu (
   //*** remove uneeded logic
   //*** change to use the unpacking unit if possible
 	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
+   .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
+   .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
                   // outputs:
                   .FAddResM, .FAddFlgM);
 	
@@ -269,7 +272,7 @@ module fpu (
   //    - writes to FP file durring min/max instructions
   //    - other comparisons write a 1 or 0 to the integer register
 	fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), 
-            .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), 
+            .FSrcXE, .FSrcYE, .FOpCtrlE, 
             .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, 
             // outputs:
 		        .Invalid(CmpNVE), .CmpResE);
@@ -325,9 +328,9 @@ module fpu (
   
 	flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
 	
-	flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
-				 {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
-				 {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
+	flopenrc #(18) EMCtrlReg(clk, reset, FlushM, ~StallM,
+				 {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE},
+				 {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM});
 	
 	
 
diff --git a/wally-pipelined/src/fpu/rounder_denorm.sv b/wally-pipelined/src/fpu/rounder_denorm.sv
index 2e1ad07e..3c9a0e91 100755
--- a/wally-pipelined/src/fpu/rounder_denorm.sv
+++ b/wally-pipelined/src/fpu/rounder_denorm.sv
@@ -1,4 +1,4 @@
-// The rounder takes as inputs a 64-bit value to be rounded, A, the 
+// The rounder takes as input logics a 64-bit value to be rounded, A, the 
 // exponent of the value to be rounded, the sign of the final result, Sign, 
 // the precision of the results, P, and the two-bit rounding mode, rm. 
 // It produces a rounded 52-bit result, Z, the exponent of the rounded 
@@ -17,38 +17,34 @@
 // where , denotes the rounding boundary. S is the logical OR of all the
 // bits to the right of R. 
  
-module rounder (Result, DenormIO, Flags, rm, P, OvEn, 
-		UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp, 
-		norm_shift, A, exponent_postsum, A_Norm, B_Norm, exp_A_unmodified, exp_B_unmodified,
-		normal_overflow, normal_underflow, swap, op_type, sum);
-
-   input  [2:0]  rm;
-   input         P;
-   input         OvEn;
-   input         UnEn;
-   input         exp_valid;
-   input [3:0] 	 sel_inv;
-   input	 Invalid;
-   input	 DenormIn;
-   input         convert;
-   input         Asign;
-   input [10:0]  Aexp;
-   input [5:0] 	 norm_shift;
-   input [63:0]  A;
-   input [10:0]  exponent_postsum;
-   input 	 A_Norm;
-   input 	 B_Norm;
-   input [11:0]  exp_A_unmodified;
-   input [11:0]  exp_B_unmodified;
-   input 	 normal_overflow;
-   input 	 normal_underflow;
-   input 	 swap;
-   input [3:0]	 op_type;
-   input [63:0]  sum;
+module rounder (
+   input logic  [2:0]  rm,
+   input logic         P,
+   input logic         OvEn,
+   input logic         UnEn,
+   input logic         exp_valid,
+   input logic [3:0] 	 sel_inv,
+   input logic	 Invalid,
+   input logic	 DenormIn,
+   input logic         Asign,
+   input logic [10:0]  Aexp,
+   input logic [5:0] 	 norm_shift,
+   input logic [63:0]  A,
+   input logic [10:0]  exponent_postsum,
+   input logic 	 A_Norm,
+   input logic 	 B_Norm,
+   input logic [11:0]  exp_A_unmodified,
+   input logic [11:0]  exp_B_unmodified,
+   input logic 	 normal_overflow,
+   input logic 	 normal_underflow,
+   input logic 	 swap,
+   input logic [2:0]	 op_type,
+   input logic [63:0]  sum,
    
-   output [63:0] Result;
-   output 	 DenormIO;
-   output [4:0]  Flags;
+   output logic [63:0] Result,
+   output logic 	 DenormIO,
+   output logic [4:0]  Flags
+);
    
    wire          Rsign;
    wire 	 Sticky_out;
@@ -87,7 +83,6 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
    wire 	 Cout_overflow;
    wire		 Texp_l7z;
    wire		 Texp_l7o;
-   wire		 OvCon;
 
    // Determine the sticky bits for double and single precision
    assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0];
@@ -152,7 +147,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
    assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z));
    
    // Set the overflow and underflow flags. They should not be set if
-   // the input was infinite or NaN or the output of the adder is zero.
+   // the input logic was infinite or NaN or the output logic of the adder is zero.
    // 00 = Valid
    // 10 = NaN
    assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
@@ -164,7 +159,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
    assign OverFlow  = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid;
 
    // The DenormIO is set if underflow has occurred or if their was a
-   // denormalized input. 
+   // denormalized input logic. 
    assign DenormIO = DenormIn | UnderFlow;
 
    // The final result is Inexact if any rounding occurred ((i.e., R or S 
@@ -192,7 +187,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
    // -0 + +0 = -0 (for RD) 
    assign Rzero = ~exp_valid | UnderFlow;
    assign Rsign = DenormIn ?
-		  ( ~(op_type[2] | op_type[1] | op_type[0]) ? 
+		  ( ~(op_type[1] | op_type[0]) ? 
 		  ( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ?
 		  ~Asign : Asign) 
    		  : ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ?
@@ -202,7 +197,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
      	          (sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] |
 	          sel_inv[2]&sel_inv[1]&~sel_inv[0] |		  
 	          ~exp_valid&rm[1]&rm[0]&~sel_inv[2] | 
-	          UnderFlow&rm[1]&rm[0]) & ~convert) & ~sel_inv[3]) |
+	          UnderFlow&rm[1]&rm[0])) & ~sel_inv[3]) |
 		  (Asign & sel_inv[3]) );
    
    // The exponent of the final result is zero if the final result is 
@@ -218,7 +213,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
    assign VeryLarge = OverFlow & ~OvEn;
    assign Infinite   = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]);
    assign Largest = VeryLarge & Round_zero;
-   assign Adj_exp = OverFlow & OvEn & ~convert;
+   assign Adj_exp = OverFlow & OvEn;
    assign Rexp[10:1] = ({10{~Valid}} | 
 			{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], 
 			 (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | 
@@ -230,7 +225,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
    // Depending on the operation and the signs of the orignal operands,
    // underflow may or may not be needed to round.
    assign Rexp_denorm = DenormIn ? 
-			((~op_type[2] & ~op_type[1] & op_type[0]) ? 
+			((~op_type[1] & op_type[0]) ? 
 				( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ? 
 					( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) 
 					: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) 
@@ -238,7 +233,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
 					( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) 
 					: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) 
 				) : 
-			(op_type[3]) ? exp_A_unmodified[10:0] : Rexp; //KEP used to be all of exp_A_unmodified
+			Rexp; //KEP used to be all of exp_A_unmodified
 
    // If the result is zero or infinity, the mantissa is all zeros. 
    // If the result is NaN, the mantissa is 10...0
@@ -256,10 +251,9 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
    // for the final result. A double precision result is returned if 
    // overflow has occurred, the overflow trap is enabled, and a conversion
    // is being performed. 
-   assign OvCon = OverFlow & OvEn & convert;
 
-   assign Result = (op_type[3]) ? {A[63:0]} : (DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : ((P&~OvCon) ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]}
-	           : {Rsign, Rexp, Rmant}));
+   assign Result = DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : (P ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]}
+	           : {Rsign, Rexp, Rmant});
 
 endmodule // rounder
 
diff --git a/wally-pipelined/src/fpu/unpacking.sv b/wally-pipelined/src/fpu/unpacking.sv
index f1f595de..3913b06b 100644
--- a/wally-pipelined/src/fpu/unpacking.sv
+++ b/wally-pipelined/src/fpu/unpacking.sv
@@ -1,11 +1,12 @@
 module unpacking ( 
     input logic  [63:0] X, Y, Z,
     input logic         FmtE,
+    input logic  [2:0]  FResultSelE,
     input logic  [2:0]  FOpCtrlE,
     output logic        XSgnE, YSgnE, ZSgnE,
     output logic [10:0] XExpE, YExpE, ZExpE,
     output logic [52:0] XManE, YManE, ZManE,
-    output logic XNormE,
+    output logic XNormE, YNormE,
     output logic XNaNE, YNaNE, ZNaNE,
     output logic XSNaNE, YSNaNE, ZSNaNE,
     output logic XDenormE, YDenormE, ZDenormE,
@@ -25,12 +26,9 @@ module unpacking (
     assign YSgnE = FmtE ? Y[63] : Y[31];
     assign ZSgnE = FmtE ? Z[63] : Z[31];
 
-    assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
-    assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
-    assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};//{Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]}; 
-/*    assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; // *** maybe convert to full number of bits here?
-    assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};
-    assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};*/
+    assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
+    assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
+    assign ZExpE = FmtE ? Z[62:52] : {Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]}; 
 
     assign XFracE = FmtE ? X[51:0] : {X[22:0], 29'b0};
     assign YFracE = FmtE ? Y[51:0] : {Y[22:0], 29'b0};
@@ -57,6 +55,7 @@ module unpacking (
     assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23];
   
     assign XNormE = ~(XExpMaxE|XExpZero);
+    assign YNormE = ~YExpZero; // only used in addcvt - checks inf and NaN seperately
     
     assign XNaNE = XExpMaxE & ~XFracZero;
     assign YNaNE = YExpMaxE & ~YFracZero;
@@ -78,7 +77,6 @@ module unpacking (
     assign YZeroE = YExpZero & YFracZero;
     assign ZZeroE = ZExpZero & ZFracZero;
 
-    assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed?
-    // assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
+    assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
 
 endmodule
\ No newline at end of file