Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2025-02-11 06:05:49 +00:00 · 2021-07-30 17:57:13 -05:00 · 2021-07-30 17:57:13 -05:00 · b7fc737d93
commit b7fc737d93
parent 245e7014b3 d8ca70fc45
13 changed files with 298 additions and 345 deletions
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@ -48,7 +48,7 @@ assign FOpCtrlE = 3'b0;
 // up - 011
 // nearest max mag - 100  
 assign FrmE = 3'b000;
-assign FmtE = 1'b0;
+assign FmtE = 1'b1;
    logic  [`FLEN-1:0] X, Y, Z;
    // logic         FmtE;
@ -76,9 +76,9 @@ assign FmtE = 1'b0;
    assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31];
    assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31];
-    assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
+    assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
-    assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
+    assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
-    assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]};//{Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; 
+    assign ZExpE = FmtE ? Addend[62:52] : {Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; 
    assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0};
    assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0};
@ -122,7 +122,7 @@ assign FmtE = 1'b0;
    assign YZeroE = YExpZero & YFracZero;
    assign ZZeroE = ZExpZero & ZFracZero;
-    assign BiasE = FmtE ? {1'b0, {`NE-1{1'b1}}} : 13'h7f;
+    assign BiasE = 13'h3ff;
 assign	wnan = FmtE ? &FMAResM[`FLEN-2:`NF] && |FMAResM[`NF-1:0] : &FMAResM[30:23] && |FMAResM[22:0]; 
 // assign	XNaNE = FmtE ? &X[62:52] && |X[51:0] : &X[62:55] && |X[54:32]; 
@ -203,7 +203,7 @@ always @(posedge clk)
 		if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
 		if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
        errors = errors + 1;
-	  //if (errors == 10)
+	  if (errors == 10)
 		$stop;
    end
 vectornum = vectornum + 1;
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
@ -1,3 +1,3 @@
-testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
+testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace
--- a/wally-pipelined/src/fpu/convert_inputs.sv
+++ b/wally-pipelined/src/fpu/convert_inputs.sv
@ -8,7 +8,7 @@
 module convert_inputs(
   input [63:0]  op1,      // 1st input operand (A)
   input [63:0]  op2,      // 2nd input operand (B)
-   input [3:0]   op_type,  // Function opcode
+   input [2:0]   op_type,  // Function opcode
   input 	     P,        // Result Precision (0 for double, 1 for single)
   output [63:0] Float1,	// Converted 1st input operand
@ -16,8 +16,6 @@ module convert_inputs(
 );
   wire 	 conv_SP;   // Convert from SP to DP
   wire 	 negate;    // Operation is negation
   wire 	 abs_val;   // Operation is absolute value
   wire 	 Zexp1;		// One if the exponent of op1 is zero
   wire 	 Zexp2;		// One if the exponent of op2 is zero
   wire 	 Oexp1;		// One if the exponent of op1 is all ones
@ -25,7 +23,7 @@ module convert_inputs(
   // Convert from single precision to double precision if (op_type is 11X
   // and P is 0) or (op_type is not 11X and P is one). 
-   assign conv_SP = (op_type[2]&op_type[1]) ^ P;
+   assign conv_SP = ~P;
   // Test if the input exponent is zero, because if it is then the
   // exponent of the converted number should be zero. 
@ -40,17 +38,14 @@ module convert_inputs(
   assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
   // Conditionally convert op2. Lower 29 bits are zero for single precision. 
-   assign Float2[62:29] = conv_SP ? {op2[30], 
+   assign Float2[62:29] = conv_SP ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
 				     {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
 			  : op2[62:29];
   assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
   // Set the sign of Float1 based on its original sign and if the operation
   // is negation (op_type = 101) or absolute value (op_type = 100)
-   assign negate  = op_type[2] & ~op_type[1] & op_type[0];
+   assign Float1[63]  = conv_SP ? op1[31] : op1[63];
   assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0]; //*** remove abs_val
   assign Float1[63]  = conv_SP ? (op1[31] ^ negate) & ~abs_val : (op1[63] ^ negate) & ~abs_val;
   assign Float2[63]  = conv_SP ? op2[31] : op2[63];
 endmodule // convert_inputs
--- a/wally-pipelined/src/fpu/exception.sv
+++ b/wally-pipelined/src/fpu/exception.sv
@ -1,95 +1,58 @@
 // Exception logic for the floating point adder. Note: We may 
 // actually want to move to where the result is computed.
-module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
+module exception (
-   input [63:0] A;		// 1st input operand (op1)
+   input logic [2:0] 	op_type,   	// Function opcode
-   input [63:0] B;		// 2nd input operand (op2)
+   input logic XSgnE, YSgnE,
-   input [3:0] 	op_type;   	// Function opcode
+   // input logic [52:0] XManE, YManE,
-   output [3:0] Ztype;		// Indicates type of result (Z)
+   input logic XDenormE, YDenormE,
-   output 	Invalid;	// Invalid operation exception
+   input logic XNormE, YNormE,
-   output 	Denorm;		// Denormalized input
+   input logic XZeroE, YZeroE,
-   output       ANorm;          // A is not zero or Denorm
+   input logic XInfE, YInfE,
-   output       BNorm;          // B is not zero or Denorm
+   input logic XNaNE, YNaNE,
-   output       Sub;		// The effective operation is subtraction
+   input logic XSNaNE, YSNaNE,
-   wire		AzeroM;	 	// '1' if the mantissa of A is zero
+   output logic [3:0] Ztype,		// Indicates type of result (Z)
-   wire		BzeroM;		// '1' if the mantissa of B is zero
+   output logic 	Invalid,	// Invalid operation exception
-   wire		AzeroE;	 	// '1' if the exponent of A is zero
+   output logic 	Denorm,		// Denormalized logic
-   wire		BzeroE;		// '1' if the exponent of B is zero
+   output logic       Sub		// The effective operation is subtraction
-   wire		AonesE;	 	// '1' if the exponent of A is all ones
+);
   wire		BonesE;		// '1' if the exponent of B is all ones
   wire		ADenorm; 	// '1' if A is a denomalized number
   wire		BDenorm; 	// '1' if B is a denomalized number
   wire		AInf;	 	// '1' if A is infinite
   wire		BInf;	 	// '1' if B is infinite
   wire		AZero;	 	// '1' if A is 0
   wire		BZero;	 	// '1' if B is 0
   wire		ANaN;	 	// '1' if A is a not-a-number
   wire		BNaN; 		// '1' if B is a not-a-number
   wire		ASNaN;	 	// '1' if A is a signalling not-a-number
   wire		BSNaN;	 	// '1' if B is a signalling not-a-number
   wire		ZQNaN;	 	// '1' if result Z is a quiet NaN
   wire		ZPInf;	 	// '1' if result Z positive infnity
   wire		ZNInf;	 	// '1' if result Z negative infnity
   wire         add_sub;	// '1' if operation is add or subtract
   wire 	converts;       // See if there are any converts   
   parameter [51:0]  fifty_two_zeros = 52'h0000000000000; // Use parameter?
   // Is this instruction a convert
-   assign converts      = ~(~op_type[1] & ~op_type[2]);
+   assign converts      = op_type[1];
   // Determine if mantissas are all zeros
   assign AzeroM = (A[51:0] == fifty_two_zeros);
   assign BzeroM = (B[51:0] == fifty_two_zeros);
   // Determine if exponents are all ones or all zeros 
   assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
   assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52];
   assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]);
   assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
   // Determine special cases. Note: Zero is not really a special case. 
   assign ADenorm = AzeroE & ~AzeroM;
   assign BDenorm = BzeroE & ~BzeroM;
   assign AInf = AonesE & AzeroM;
   assign BInf = BonesE & BzeroM;
   assign ANaN = AonesE & ~AzeroM;
   assign BNaN = BonesE & ~BzeroM;
   assign ASNaN = ANaN & ~A[51];
   assign BSNaN = BNaN & ~B[51];
   assign AZero = AzeroE & AzeroM;
   assign BZero = BzeroE & BzeroE;
   // A and B are normalized if their exponents are not zero. 
   assign ANorm = ~AzeroE;
   assign BNorm = ~BzeroE;
   // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
   // or (A and B are both Infinite and the "effective operation" is 
   // subtraction). 
-   assign add_sub = ~op_type[2] & ~op_type[1];
+   assign add_sub = ~op_type[1];
-   assign Invalid = (ASNaN | BSNaN | 
+   assign Invalid = (XSNaNE | YSNaNE | (add_sub & XInfE & YInfE & (XSgnE^YSgnE^op_type[0]))) & ~converts;
 		     (add_sub & AInf & BInf & (A[63]^B[63]^op_type[0]))) & ~converts;
   // The Denorm flag is set if (A is denormlized and the operation is not integer 
   // conversion ) or (if B is normalized and the operation is addition or  subtraction). 
-   assign Denorm = ADenorm&(op_type[2]|~op_type[1]) | BDenorm & add_sub;
+   assign Denorm = XDenormE | YDenormE & add_sub;
   // The result is a quiet NaN if (an "Invalid Operation" exception occurs) 
   // or (A is a NaN) or (B is a NaN and the operation uses B).
-   assign ZQNaN = Invalid | ANaN | (BNaN & add_sub);
+   assign ZQNaN = Invalid | XNaNE | (YNaNE & add_sub);
   // The result is +Inf if ((A is +Inf) or (B is -Inf and the operation is
   // subtraction) or (B is +Inf and the operation is addition)) and (the
   // result is not a quiet NaN).  
-   assign ZPInf = (AInf&A[63] | add_sub&BInf&(~B[63]^op_type[0]))&~ZQNaN;
+   assign ZPInf = (XInfE&XSgnE | add_sub&YInfE&(~YSgnE^op_type[0]))&~ZQNaN;
   // The result is -Inf if ((A is -Inf) or (B is +Inf and the operation is
   // subtraction) or (B is -Inf and the operation is addition)) and the
   // result is not a quiet NaN.  
-   assign ZNInf = (AInf&~A[63] | add_sub&BInf&(B[63]^op_type[0]))&~ZQNaN;
+   assign ZNInf = (XInfE&~XSgnE | add_sub&YInfE&(YSgnE^op_type[0]))&~ZQNaN;
   // Set the type of the result as follows:
   // (needs optimization - got lazy or was late)
@ -102,19 +65,19 @@ module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type);
   //  0101     +Bzero and -Azero (and vice-versa)
   //  1000     Convert SP to DP (and vice-versa)
-   assign Ztype[0] = ((ZQNaN | ZPInf) & ~(~op_type[2] & op_type[1])) | 
+   assign Ztype[0] = (ZQNaN | ZPInf) | 
-		     ((AZero & BZero & (A[63]^B[63]^op_type[0])) 
+		     ((XZeroE & YZeroE & (XSgnE^YSgnE^op_type[0])) 
 		      & ~converts);
-   assign Ztype[1] = ((ZNInf | ZPInf) & ~(~op_type[2] & op_type[1])) | 
+   assign Ztype[1] = (ZNInf | ZPInf) | 
-		     (((AZero & BZero & A[63] & B[63] & ~op_type[0]) |
+		     (((XZeroE & YZeroE & XSgnE & YSgnE & ~op_type[0]) |
-		       (AZero & BZero & A[63] & ~B[63] & op_type[0])) 
+		       (XZeroE & YZeroE & XSgnE & ~YSgnE & op_type[0])) 
 		      & ~converts);
-   assign Ztype[2] = ((AZero & BZero & ~op_type[1] & ~op_type[2]) 
+   assign Ztype[2] = ((XZeroE & YZeroE & ~op_type[1]) 
 		      & ~converts);
-   assign Ztype[3] = (op_type[1] & op_type[2] & ~op_type[0]);
+   assign Ztype[3] = (op_type[1] & ~op_type[0]);
   // Determine if the effective operation is subtraction
-   assign Sub = ~(op_type[3] & ~op_type[0]) & ( (op_type[3] & op_type[0]) | (add_sub & (A[63]^B[63]^op_type[0])) );
+   assign Sub = add_sub & (XSgnE^YSgnE^op_type[0]);
 endmodule // exception
--- a/wally-pipelined/src/fpu/exception_div.sv
+++ b/wally-pipelined/src/fpu/exception_div.sv
@ -27,7 +27,7 @@ module exception_div (
   logic 	      ZInf;	 	// '1' if result Z is an infnity
   logic 	      Zero;             // '1' if result is zero   
-
+   //***take this module out and add more registers or just recalculate it all
   // Determine if mantissas are all zeros
   assign AzeroM = (A[51:0] == 52'h0);
   assign BzeroM = (B[51:0] == 52'h0);
--- a/wally-pipelined/src/fpu/faddcvt.sv
+++ b/wally-pipelined/src/fpu/faddcvt.sv
@ -33,9 +33,22 @@ module faddcvt(
   input logic          StallM,     // stall the memory stage
   input logic  [63:0]  FSrcXE,		// 1st input operand (A)
   input logic  [63:0]  FSrcYE,		// 2nd input operand (B)
-   input logic  [3:0]   FOpCtrlE, FOpCtrlM,	// Function opcode
+   input logic  [2:0]   FOpCtrlE, FOpCtrlM,	// Function opcode
   input logic          FmtE, FmtM,   	// Result Precision (0 for double, 1 for single)
   input logic  [2:0] 	FrmM,		      // Rounding mode - specify values 
   input logic XSgnE, YSgnE,
   input logic [52:0] XManE, YManE,
   input logic [10:0] XExpE, YExpE,
   input logic XSgnM, YSgnM,
   input logic [52:0] XManM, YManM,
   input logic [10:0] XExpM, YExpM,
   input logic XDenormE, YDenormE,
   input logic XNormE, YNormE,
   input logic XNormM, YNormM,
   input logic XZeroE, YZeroE,
   input logic XInfE, YInfE,
   input logic XNaNE, YNaNE,
   input logic XSNaNE, YSNaNE,
   output logic [63:0]  FAddResM,	   // Result of operation
   output logic [4:0]   FAddFlgM);   	// IEEE exception flags 
@ -44,53 +57,53 @@ module faddcvt(
   logic [3:0] 	AddSelInvE, AddSelInvM;
   logic [10:0] 	AddExpPostSumE,AddExpPostSumM;
   logic 		   AddCorrSignE, AddCorrSignM;
   logic          AddOp1NormE, AddOp1NormM;
   logic          AddOp2NormE, AddOp2NormM;
   logic          AddOpANormE,  AddOpANormM;
   logic          AddOpBNormE, AddOpBNormM;
   logic          AddInvalidE, AddInvalidM;
   logic 		   AddDenormInE, AddDenormInM;
   logic          AddSwapE, AddSwapM;
   logic          AddSignAE, AddSignAM;
   logic 		   AddConvertE, AddConvertM;
   logic [63:0] 	AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
   logic [11:0] 	AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
   logic [10:0] 	AddExponentE, AddExponentM;
-   fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, 
+   fpuaddcvt1 fpadd1 (.FOpCtrlE, .FmtE, .AddExponentE, 
                     .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, 
-                     .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, 
+   .XSgnE, .YSgnE,.XManE, .YManE, .XExpE, .YExpE,  .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
-                     .AddDenormInE, .AddConvertE, .AddSwapE);
+                     .AddCorrSignE, .AddSignAE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, 
                     .AddDenormInE, .AddSwapE);
   // E/M pipeline registers
   flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); 
   flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); 
   flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); 
   flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); 
   flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); 
   flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); 
   flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); 
   flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
-   flopenrc #(14) EMRegAdd9(clk, reset, FlushM, ~StallM, 
+   flopenrc #(11) EMRegAdd9(clk, reset, FlushM, ~StallM, 
-                           {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddSignAE},
+                           {AddSelInvE, AddCorrSignE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddSwapE, AddSignAE},
-                           {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM}); 
+                           {AddSelInvM, AddCorrSignM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddSwapM, AddSignAM}); 
-   fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, 
+   fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM,  .XNormM, .YNormM, 
-                     .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, 
+                     .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
-                     .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, 
+                     .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, 
-                     .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
+                     .AddSignAM, .AddCorrSignM, .AddSwapM, .FAddResM, .FAddFlgM);
 endmodule
 module fpuaddcvt1 (
-   input logic [63:0]   FSrcXE,		// 1st input operand (A)
+   input logic [2:0]	   FOpCtrlE,	// Function opcode
   input logic [63:0]   FSrcYE,		// 2nd input operand (B)
   input logic [3:0]	   FOpCtrlE,	// Function opcode
   input logic 	      FmtE,   		// Result Precision (1 for double, 0 for single)
   input logic XSgnE, YSgnE,
   input logic [10:0] XExpE, YExpE,
   input logic [52:0] XManE, YManE,
   input logic XDenormE, YDenormE,
   input logic XNormE, YNormE,
   input logic XZeroE, YZeroE,
   input logic XInfE, YInfE,
   input logic XNaNE, YNaNE,
   input logic XSNaNE, YSNaNE,
   output logic [63:0] 	AddFloat1E, 
   output logic [63:0] 	AddFloat2E,
   output logic [10:0] 	AddExponentE,
   output logic [10:0]	AddExpPostSumE,
   output logic [11:0]  AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0]
@ -98,11 +111,9 @@ module fpuaddcvt1 (
   output logic [3:0]   AddSelInvE,
   output logic         AddCorrSignE,
   output logic 	      AddSignAE,
   output logic	      AddOp1NormE, AddOp2NormE,
   output logic	      AddOpANormE, AddOpBNormE,
   output logic	      AddInvalidE,
   output logic 	      AddDenormInE,
   output logic 	      AddConvertE,
   output logic         AddSwapE
   );
@ -112,7 +123,7 @@ module fpuaddcvt1 (
   wire		    ZV_mantissaB;
   wire          P;
-   assign P = ~FmtE;
+   assign P = ~(FmtE^FOpCtrlE[1]);
   wire [63:0] IntValue;
   wire [11:0] exp1, exp2;
@ -130,22 +141,15 @@ module fpuaddcvt1 (
   wire 	      zeroB;
   wire [5:0]	align_shift;
   // Convert the input operands to their appropriate forms based on 
   // the orignal operands, the FOpCtrlE , and their precision P. 
   // Single precision inputs are converted to double precision 
   // and the sign of the first operand is set appropratiately based on
   // if the operation is absolute value or negation. 
   convert_inputs conv1 (.Float1(AddFloat1E), .Float2(AddFloat2E), .op1(FSrcXE), .op2(FSrcYE), .op_type(FOpCtrlE), .P);
   // Test for exceptions and return the "Invalid Operation" and
   // "Denormalized" Input Flags. The "AddSelInvE" is used in
   // the third pipeline stage to select the result. Also, AddOp1NormE
   // and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
   // sub is one if the effective operation is subtaction. 
-   exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, 
+   exception exc1 (.Ztype(AddSelInvE), .Invalid(AddInvalidE), .Denorm(AddDenormInE), .Sub(sub), 
-		   AddFloat1E, AddFloat2E, FOpCtrlE);
+   .XSgnE, .YSgnE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
 	.op_type(FOpCtrlE));
   // Perform Exponent Subtraction (used for alignment). For performance
   // both exponent subtractions are performed in parallel. This was 
@ -153,25 +157,25 @@ module fpuaddcvt1 (
   // the two parallel additions. The input values are zero-extended to 12 
   // bits prior to performing the addition. 
-   assign exp1 = {1'b0, AddFloat1E[62:52]};
+   assign exp1 = {1'b0, XExpE};
-   assign exp2 = {1'b0, AddFloat2E[62:52]};
+   assign exp2 = {1'b0, YExpE};
   assign exp_diff1 = exp1 - exp2;
-   assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
+   assign exp_diff2 = AddDenormInE ? ({YSgnE, YExpE} - {XSgnE, XExpE}): exp2 - exp1;
   // The second operand (B) should be set to zero, if FOpCtrlE does not
   // specify addition or subtraction
-   assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
+   assign zeroB = FOpCtrlE[1];
   // Swapped operands if zeroB is not one and exp1 < exp2. 
   // Swapping causes exp2 to be used for the result exponent. 
   // Only the exponent of the larger operand is used to determine
   // the final result. 
   assign AddSwapE = exp_diff1[11] & ~zeroB;
-   assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
+   assign AddExponentE = AddSwapE ? YExpE : XExpE;
-   assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
+   assign AddExpPostSumE = AddSwapE ? YExpE : XExpE;
-   assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
+   assign mantissaA = AddSwapE ? YManE[51:0] : XManE[51:0];
-   assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
+   assign mantissaB = AddSwapE ? XManE[51:0] : YManE[51:0];
-   assign AddSignAE     = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];   
+   assign AddSignAE     = AddSwapE ? YSgnE : XSgnE;   
   // Leading-Zero Detector. Determine the size of the shift needed for
   // normalization. If sum_corrected is all zeros, the exp_valid is 
@ -201,8 +205,8 @@ module fpuaddcvt1 (
   // and loss of sign information. The two bits to the right of the 
   // original mantissa form the "guard" and "round" bits that are used
   // to round the result. 
-   assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
+   assign AddOpANormE = AddSwapE ? YNormE : XNormE;
-   assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
+   assign AddOpBNormE = AddSwapE ? XNormE : YNormE;
   assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
   assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
@ -223,19 +227,18 @@ module fpuaddcvt1 (
   // and the exponent value is left unchanged. 
   // Under denormalized cases, the exponent before the rounder is set to 1
   // if the normal shift value is 11.
-   assign AddConvertE       = ~FOpCtrlE[2] & FOpCtrlE[1];
+   assign mantissaA3    = AddDenormInE ? ({12'h0, mantissaA}) : {mantissaA1, 7'h0};
   assign mantissaA3    = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
   // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to 
   // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six  
   // zeros. 
-   assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
+   assign mantissaB3[63:7] = AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}};
-   assign mantissaB3[6]    = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
+   assign mantissaB3[6]    = AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB;
-   assign mantissaB3[5:0]  = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
+   assign mantissaB3[5:0]  = AddDenormInE ? mantissaB[5:0] : 6'h0;
   // The sign of the result needs to be corrected if the true
   // operation is subtraction and the input operands were swapped. 
-   assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
+   assign AddCorrSignE = ~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
   // 64-bit Mantissa Adder/Subtractor
   cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
@ -281,31 +284,31 @@ endmodule // fpadd
 module fpuaddcvt2 (
-   input [2:0] 	FrmM,		// Rounding mode - specify values 
+   input logic [2:0] 	FrmM,		// Rounding mode - specify values 
-   input [3:0]	FOpCtrlM,	// Function opcode
+   input logic [2:0]	FOpCtrlM,	// Function opcode
-   input 	FmtM,   		// Result Precision (0 for double, 1 for single)
+   input logic 	FmtM,   		// Result Precision (0 for double, 1 for single)
-   input [63:0] AddSumM, AddSumTcM,
+   input logic [63:0] AddSumM, AddSumTcM,
-   input [63:0] 	 AddFloat1M, 
+   input logic [11:0]	 AddExp1DenormM, AddExp2DenormM,
-   input [63:0] 	 AddFloat2M,
+   input logic [10:0] 	 AddExponentM, AddExpPostSumM,
-   input [11:0]	 AddExp1DenormM, AddExp2DenormM,
+   input logic [3:0] 	 AddSelInvM,
-   input [10:0] 	 AddExponentM, AddExpPostSumM,
+   input logic XSgnM, YSgnM,
-   input [3:0] 	 AddSelInvM,
+   input logic [52:0] XManM, YManM,
-   input		 AddOp1NormM, AddOp2NormM,
+   input logic [10:0] XExpM, YExpM,
-   input		 AddOpANormM, AddOpBNormM,
+   input logic XNormM, YNormM,
-   input		 AddInvalidM,
+   input logic		 AddOpANormM, AddOpBNormM,
-   input 	 AddDenormInM, 
+   input logic		 AddInvalidM,
-   input 	 AddSignAM, 
+   input logic 	 AddDenormInM, 
-   input         AddCorrSignM,
+   input logic 	 AddSignAM, 
-   input 	 AddConvertM,
+   input logic         AddCorrSignM,
-   input          AddSwapM,
+   input logic          AddSwapM,
-   output [63:0] FAddResM,	// Result of operation
+   output logic [63:0] FAddResM,	// Result of operation
-   output [4:0]  FAddFlgM   	// IEEE exception flags 
+   output logic [4:0]  FAddFlgM   	// IEEE exception flags 
 );
   wire 	 AddDenormM;   	// AddDenormM on input or output   
   wire          P;
-   assign P = ~FmtM;
+   assign P = ~(FmtM^FOpCtrlM[1]);
   wire [10:0]   exp_pre;
   wire [63:0] 	 Result;   
@ -338,15 +341,15 @@ module fpuaddcvt2 (
   //cases/conversion cases
   assign exp_pre       = AddDenormInM ?
                          ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
-                          : (AddConvertM ? 11'b10000111100 : AddExponentM);
+                          : AddExponentM;
   // Finds normal underflow result to determine whether to round final AddExponentM down
   // Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
-   assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
+   assign Float1_sum_comp = ~(XManM[51:0] > AddSumM[51:0]);
-   assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
+   assign Float2_sum_comp = ~(YManM[51:0] > AddSumM[51:0]);
-   assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
+   assign Float1_sum_tc_comp = ~(XManM[51:0] > AddSumTcM[51:0]);
-   assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
+   assign Float2_sum_tc_comp = ~(YManM[51:0] > AddSumTcM[51:0]);
   // Determines the correct Float value to compare based on AddSwapM result
   assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
@ -357,16 +360,16 @@ module fpuaddcvt2 (
   // If the signs are different and both operands aren't denormalized
   // the normal underflow bit is needed and therefore updated.
-   assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
+   assign normal_underflow = ((XSgnM ^ YSgnM) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
   // Determine the correct sign of the result
-   assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];   
+   assign sign_corr = (AddCorrSignM ^ AddSignAM) ^ AddSumM[63];   
   // If the AddSumM is negative, use its two complement instead. 
   // This value has to be 64-bits to correctly handle the 
   // case 10...00
-   assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
+   assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (XSgnM ~^ YSgnM) & FOpCtrlM[0] ) | ((XSgnM ^ YSgnM) & ~FOpCtrlM[0]) ))
-			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
+			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : (AddSumM[63] ? AddSumTcM : AddSumM);
   // Finds normal underflow result to determine whether to round final AddExponentM down
   //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
@ -384,7 +387,7 @@ module fpuaddcvt2 (
   // be right shifted. It outputs the normalized AddSumM. 
   barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
-   assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
+   assign sum_norm_w_bypass = sum_norm;
   // Round the mantissa to a 52-bit value, with the leading one
   // removed. If the result is a single precision number, the actual 
@ -397,10 +400,10 @@ module fpuaddcvt2 (
   // help in processor reservation station detection of load/stores. In
   // other words, the processor would like to know ahead of time that
   // if the result is an exception then don't load or store.
-   rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid, 
+   rounder round1 (.Result, .DenormIO, .Flags(FlagsIn), .rm(FrmM), .P, .OvEn(AddOvEnM), .UnEn(AddUnEnM), .exp_valid, 
-		   AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
+		   .sel_inv(AddSelInvM), .Invalid(AddInvalidM), .DenormIn(AddDenormInM), .Asign(sign_corr), .Aexp(exp_pre), .norm_shift, .A(sum_norm_w_bypass),
-		   AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
+		   .exponent_postsum(AddExpPostSumM), .A_Norm(XNormM), .B_Norm(YNormM), .exp_A_unmodified({XSgnM, XExpM}), .exp_B_unmodified({YSgnM, YExpM}),
-		   AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
+		   .normal_overflow(AddNormOvflowM), .normal_underflow, .swap(AddSwapM), .op_type(FOpCtrlM), .sum(AddSumM));
   // Store the final result and the exception flags in registers.
   assign FAddResM = Result;
--- a/wally-pipelined/src/fpu/fctrl.sv
+++ b/wally-pipelined/src/fpu/fctrl.sv
@ -9,7 +9,7 @@ module fctrl (
  output logic       FRegWriteD,  // FP register write enable
  output logic       FDivStartD,  // Start division or squareroot
  output logic [2:0] FResultSelD, // select result to be written to fp register
-  output logic [3:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
+  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
  output logic [1:0] FResSelD,    // select one of the results done in the memory stage
  output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
  output logic       FmtD,        // precision - single-0 double-1
@ -24,82 +24,82 @@ module fctrl (
    case(OpD)
    // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
      7'b0000111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw
+                    3'b010:  ControlsD = `FCTRLW'b1_0_000_000_00_00_0_0; // flw
-                    3'b011:  ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld
+                    3'b011:  ControlsD = `FCTRLW'b1_0_000_001_00_00_0_0; // fld
-                    default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                    default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                  endcase
      7'b0100111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw
+                    3'b010:  ControlsD = `FCTRLW'b0_0_000_010_00_00_0_0; // fsw
-                    3'b011:  ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd
+                    3'b011:  ControlsD = `FCTRLW'b0_0_000_011_00_00_0_0; // fsd
-                    default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                    default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                  endcase
-      7'b1000011:   ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd
+      7'b1000011:   ControlsD = `FCTRLW'b1_0_001_000_00_00_0_0; // fmadd
-      7'b1000111:   ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub
+      7'b1000111:   ControlsD = `FCTRLW'b1_0_001_001_00_00_0_0; // fmsub
-      7'b1001011:   ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub
+      7'b1001011:   ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub
-      7'b1001111:   ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd
+      7'b1001111:   ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd
      7'b1010011: casez(Funct7D)
-                    7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd
+                    7'b00000??: ControlsD = `FCTRLW'b1_0_010_000_00_00_0_0; // fadd
-                    7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub
+                    7'b00001??: ControlsD = `FCTRLW'b1_0_010_001_00_00_0_0; // fsub
-                    7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul
+                    7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul
-                    7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv
+                    7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv
-                    7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt
+                    7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt
                    7'b00100??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_000_01_00_0_0; // fsgnj
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_001_01_00_0_0; // fsgnjn
-                                  3'b010:  ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx
+                                  3'b010:  ControlsD = `FCTRLW'b1_0_100_010_01_00_0_0; // fsgnjx
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b00101??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_100_111_10_00_0_0; // fmin
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_100_101_10_00_0_0; // fmax
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b10100??: case(Funct3D)
-                                  3'b010:  ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq
+                                  3'b010:  ControlsD = `FCTRLW'b0_1_100_010_00_00_0_0; // feq
-                                  3'b001:  ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt
+                                  3'b001:  ControlsD = `FCTRLW'b0_1_100_001_00_00_0_0; // flt
-                                  3'b000:  ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle
+                                  3'b000:  ControlsD = `FCTRLW'b0_1_100_011_00_00_0_0; // fle
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b11100??: if (Funct3D == 3'b001)
-                                  ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass
+                                  ControlsD = `FCTRLW'b0_1_100_000_00_10_0_0; // fclass
-                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
+                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_100_00_01_0_0; // fmv.x.w
-                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
+                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_101_00_01_0_0; // fmv.x.d
-                                else                            ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                else                            ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                    7'b1101000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.s.w
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.s.w
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.s.wu
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.s.wu
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.s.l
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.s.l
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.s.lu
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.s.lu
                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b1100000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.s
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.s
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.s
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.s
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.s
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.s
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.s
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.s
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                endcase
-                    7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
+                    7'b1111000: ControlsD = `FCTRLW'b1_0_100_000_00_00_0_0; // fmv.w.x
-                    7'b0100000: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.s.d
+                    7'b0100000: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.s.d
                    7'b1101001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.d.w
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.d.w
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.d.wu
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.d.wu
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.d.l
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.d.l
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.d.lu
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.d.lu
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b1100001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.d
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.d
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.d
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.d
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.d
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.d
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.d
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.d
-                                  default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                                  default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                                endcase
-                    7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
+                    7'b1111001: ControlsD = `FCTRLW'b1_0_100_001_00_00_0_0; // fmv.d.x
-                    7'b0100001: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.d.s
+                    7'b0100001: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.d.s
-                    default:    ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+                    default:    ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
                  endcase
-      default:      ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
+      default:      ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction
    endcase
  // unswizzle control bits
@ -117,7 +117,7 @@ module fctrl (
  // Precision
  //    0-single
  //    1-double
-  assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
+  assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : FResultSelD == 3'b010 ? Funct7D[0]^FOpCtrlD[1] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
  // FResultSel:
  //    000 - ReadRes - load
--- a/wally-pipelined/src/fpu/fcvt.sv
+++ b/wally-pipelined/src/fpu/fcvt.sv
@ -11,7 +11,7 @@ module fcvt (
    input logic             XDenormE,   // is X denormalized
    input logic [10:0]      BiasE,      // bias - depends on precision (max exponent/2)
    input logic [`XLEN-1:0] SrcAE,      // integer input
-    input logic [3:0]       FOpCtrlE,   // chooses which instruction is done (full list below)
+    input logic [2:0]       FOpCtrlE,   // chooses which instruction is done (full list below)
    input logic [2:0]       FrmE,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic             FmtE,       // precision 1 = double 0 = single
    output logic [63:0]     CvtResE,    // convert final result
@ -43,27 +43,27 @@ module fcvt (
    logic               RoundSgn;           // sign of the rounded result
    // FOpCtrlE:
-      //  fcvt.w.s  = 0010
+      //  fcvt.w.s  = 001
-      //  fcvt.wu.s = 0110
+      //  fcvt.wu.s = 011
-      //  fcvt.s.w  = 0001
+      //  fcvt.s.w  = 000
-      //  fcvt.s.wu = 0101
+      //  fcvt.s.wu = 010
-      //  fcvt.l.s  = 1010
+      //  fcvt.l.s  = 101
-      //  fcvt.lu.s = 1110
+      //  fcvt.lu.s = 111
-      //  fcvt.s.l  = 1001
+      //  fcvt.s.l  = 100
-      //  fcvt.s.lu = 1101
+      //  fcvt.s.lu = 110
-      //  fcvt.w.d  = 0010 
+      //  fcvt.w.d  = 001 
-      //  fcvt.wu.d = 0110
+      //  fcvt.wu.d = 011
-      //  fcvt.d.w  = 0001
+      //  fcvt.d.w  = 000
-      //  fcvt.d.wu = 0101
+      //  fcvt.d.wu = 010
-      //  fcvt.l.d  = 1010
+      //  fcvt.l.d  = 101
-      //  fcvt.lu.d = 1110
+      //  fcvt.lu.d = 111
-      //  fcvt.d.l  = 1001
+      //  fcvt.d.l  = 100
-      //  fcvt.d.lu = 1101
+      //  fcvt.d.lu = 110
      //  {long, unsigned, to int, from int}
    // calculate signals based off the input and output's size
-    assign Res64 = (FOpCtrlE[1]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[0]);
+    assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]);
-    assign In64 =  (FOpCtrlE[0]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[1]);
+    assign In64 =  (~FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&FOpCtrlE[0]);
    assign SubBits = In64 ? 8'd64 : 8'd32;
    assign Bits = Res64 ? 8'd64 : 8'd32;
@ -73,11 +73,11 @@ module fcvt (
 ////////////////////////////////////////////////////////
    // position the input in the most significant bits
-    assign IntIn = FOpCtrlE[3] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0};
+    assign IntIn = FOpCtrlE[2] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0};
    // make the integer positive
-    assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
+    assign PosInt = IntIn[64-1]&~FOpCtrlE[1] ? -IntIn : IntIn;
    // determine the integer's sign
-    assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
+    assign ResSgn = ~FOpCtrlE[1] ? IntIn[64-1] : 1'b0;
 	// Leading one detector
 	logic [8:0]	i;
@ -97,8 +97,8 @@ module fcvt (
    // select the shift value and amount based on operation (to fp or int)
-    assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
+    assign ShiftCnt = FOpCtrlE[0] ? ExpVal : LZResP;
-    assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XManE} : {PosInt, 52'b0};
+    assign ShiftVal = FOpCtrlE[0] ? {{64-2{1'b0}}, XManE} : {PosInt, 52'b0};
 	// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
 	// if the shift is negitive add a bit for sticky bit calculation
@ -111,35 +111,35 @@ module fcvt (
    // calculate sticky bit 
    //  - take into account the possible right shift from before
    //  - the sticky bit calculation covers three diffrent sizes depending on the opperation
-    assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
+    assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (~FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (~FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
    // determine guard, round, and least significant bit of the result
-    assign Guard = FOpCtrlE[1] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
+    assign Guard = FOpCtrlE[0] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
-    assign Round = FOpCtrlE[1] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
+    assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
-    assign LSB = FOpCtrlE[1] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
+    assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
    always_comb begin
        // Determine if you add 1
        case (FrmE)
            3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
            3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
+            3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[0]) | (ResSgn&~FOpCtrlE[0]);//round down
-            3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
+            3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[0]) | (~ResSgn&~FOpCtrlE[0]);//round up
            3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
            default: CalcPlus1 = 1'bx;
        endcase
    end
    // dont tound if the result is exact
-    assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]);
+    assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[0]);
    // round the shifted mantissa
    assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
    assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 :  {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
    // fit the rounded result into the appropriate size and take the 2's complement if needed
-     assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : 
+     assign Rounded = Res64 ? XSgnE&FOpCtrlE[0] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : 
 			      XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
    // extract the MSB and Sign for later use (will be used to determine underflow and overflow)
@ -148,29 +148,29 @@ module fcvt (
    // check if the result overflows
-    assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
+    assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[1]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
    // check if the result underflows (this calculation changes if the result is signed or unsigned)
-    assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]);    // assign CvtIntRes =  (XSgnE | ShiftCnt[12]) ? {64{1'b0}}  : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
+    assign Uf = FOpCtrlE[1] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]);    // assign CvtIntRes =  (XSgnE | ShiftCnt[12]) ? {64{1'b0}}  : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
    // calculate the result's sign
-    assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
+    assign SgnRes = ~FOpCtrlE[2] & FOpCtrlE[0];
    // select the integer result
-    assign CvtIntRes = Of ? FOpCtrlE[2] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
+    assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
-                    Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
+                    Uf ? FOpCtrlE[1] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
 		            Rounded[64-1:0];
    // select the floating point result            
    assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]};
    // select the result
-    assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
+    assign CvtResE = ~FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
    // calculate the flags
    //      - to int only sets the invalid flag
    //      - from int only sets the inexact flag
-    assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
+    assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]};
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@ -23,7 +23,7 @@
 ///////////////////////////////////////////
 `include "wally-config.vh"
-// `include "../../../config/rv64icfd/wally-config.vh"
+//  `include "../../../config/rv64icfd/wally-config.vh"
 module fma(
    input logic                 clk,
@ -106,6 +106,7 @@ module fma1(
    logic [`NE+1:0]     AlignCnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
    logic [4*`NF+5:0]   ZManShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
    logic [4*`NF+5:0]   ZManPreShifted;     // input to the alignment shifter U(NF+5.3NF+1)
    logic [`NE-2:0]     Denorm;             // Denormalized input value
    ///////////////////////////////////////////////////////////////////////////////
    // Calculate the product
@ -116,8 +117,9 @@ module fma1(
    ///////////////////////////////////////////////////////////////////////////////
    // verilator lint_off WIDTH
    assign Denorm = FmtE ? 1 : -126+1023;
    assign ProdExpE = (XZeroE|YZeroE) ? 0 :
-                 XExpE + YExpE - BiasE + XDenormE + YDenormE;
+                 XExpE + YExpE - BiasE + ({`NE-1{XDenormE}}&Denorm) + ({`NE-1{YDenormE}}&Denorm);
    // verilator lint_on WIDTH
    // Calculate the product's mantissa
@ -133,7 +135,7 @@ module fma1(
    //      - positive means the product is larger, so shift Z right
    //      - Denormal numbers have an an exponent value of 1, however they are
    //        represented with an exponent of 0. add one to the exponent if it is a denormal number
-    assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
+    assign AlignCnt = ProdExpE - (ZExpE + ({`NE-1{ZDenormE}}&Denorm));
    // Defualt Addition without shifting
    //          |   54'b0    |  106'b(product)  | 2'b0 |
@ -320,7 +322,9 @@ module fma2(
    //assign FracLen = `NF;
    // Determine if the result is denormal
-    assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
+    logic [`NE+1:0] SumExpTmpTmp;
    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
    assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
@ -511,7 +515,7 @@ module fma2(
                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
    assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
-    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
+    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + (CalcPlus1&(AddendStickyM|FrmM[1])) : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
    assign FMAResM = XNaNM ? XNaNResult :
                        YNaNM ? YNaNResult :
--- a/wally-pipelined/src/fpu/fpdiv.sv
+++ b/wally-pipelined/src/fpu/fpdiv.sv
@ -75,15 +75,8 @@ module fpdiv (
   // div/sqrt
         //  fdiv  = 0
         //  fsqrt = 1
-
+   assign Float1 = op1;
-   // Convert the input operands to their appropriate forms based on 
+   assign Float2 = op_type ? op1 : op2;   
   // the orignal operands, the op_type , and their precision P. 
   // Single precision inputs are converted to double precision 
   // and the sign of the first operand is set appropratiately based on
   // if the operation is absolute value or negation.   
   convert_inputs_div conv1 (.op1, .op2, .op_type, .P, 
                           // outputs:
                           .Float1, .Float2b(Float2));
   // Test for exceptions and return the "Invalid Operation" and
   // "Denormalized" Input Flags. The "sel_inv" is used in
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -57,7 +57,7 @@ module fpu (
  //                single stored in a double: | 32 1s | single precision value |
  //    - sets the underflow after rounding
-  generate if (`F_SUPPORTED | `D_SUPPORTED) begin 
+  generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu
  // control signals
 	logic 		  FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
@ -67,7 +67,7 @@ module fpu (
 	logic 		  FWriteIntD;                         // Write to integer register
 	logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
 	logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register
-	logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM;           // Select which opperation to do in each component
+	logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM;           // Select which opperation to do in each component
 	logic [1:0] FResSelD, FResSelE, FResSelM;           // Select one of the results that finish in the memory stage
 	logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;  // Select the result written to the integer resister
 	logic [4:0] Adr1E, Adr2E, Adr3E;                    // adresses of each input
@ -97,7 +97,8 @@ module fpu (
 	logic 		   XInfE, YInfE, ZInfE;           // is the input infinity - execute stage
 	logic 		   XInfM, YInfM, ZInfM;           // is the input infinity - memory stage
 	logic 		   XExpMaxE;                      // is the exponent all ones (max value)
-	logic 		   XNormE;                        // is X normal
+	logic 		   XNormE,YNormE;                 // is normal
 	logic 		   XNormM,YNormM;                 // is normal
 	// result and flag signals
@ -171,7 +172,7 @@ module fpu (
 	flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
 	flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                                                       {Adr1E,         Adr2E,         Adr3E});
-	flopenrc #(18) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+	flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, 
 				  {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
 				  {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
@ -203,11 +204,11 @@ module fpu (
  // unpacking unit
  //    - splits FP inputs into their various parts
  //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
-	unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, 
+	unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FResultSelE, .FmtE, 
                      // outputs:
                      .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
                      .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
-                      .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
+                      .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE, .YNormE);
  // FMA
  //    - two stage FMA
@ -222,7 +223,7 @@ module fpu (
 		 .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
     .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
     .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-		 .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), 
+		 .FOpCtrlE, .FOpCtrlM, 
 		 .FmtE, .FmtM, .FrmM, 
     // outputs:
     .FMAFlgM, .FMAResM);
@ -240,10 +241,10 @@ module fpu (
  //    - if not captured any forwarded inputs will change durring computation
  //        - this problem is caused by stalling the execute stage
  //    - the other units don't have this problem, only div/sqrt stalls the execute stage
-	flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
+	flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
 				   .en(1'b1), .clear(FDivSqrtDoneE),
 				   .reset(reset),  .clk(FDivBusyE));
-	flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
+	flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
 				   .en(1'b1), .clear(FDivSqrtDoneE),
 				   .reset(reset),  .clk(FDivBusyE));
@ -261,6 +262,8 @@ module fpu (
  //*** remove uneeded logic
  //*** change to use the unpacking unit if possible
 	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
   .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
   .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
                  // outputs:
                  .FAddResM, .FAddFlgM);
@ -269,7 +272,7 @@ module fpu (
  //    - writes to FP file durring min/max instructions
  //    - other comparisons write a 1 or 0 to the integer register
 	fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), 
-            .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), 
+            .FSrcXE, .FSrcYE, .FOpCtrlE, 
            .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, 
            // outputs:
 		        .Invalid(CmpNVE), .CmpResE);
@ -325,9 +328,9 @@ module fpu (
 	flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
-	flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
+	flopenrc #(18) EMCtrlReg(clk, reset, FlushM, ~StallM,
-				 {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
+				 {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE},
-				 {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
+				 {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM});
--- a/wally-pipelined/src/fpu/rounder_denorm.sv
+++ b/wally-pipelined/src/fpu/rounder_denorm.sv
@ -1,4 +1,4 @@
-// The rounder takes as inputs a 64-bit value to be rounded, A, the 
+// The rounder takes as input logics a 64-bit value to be rounded, A, the 
 // exponent of the value to be rounded, the sign of the final result, Sign, 
 // the precision of the results, P, and the two-bit rounding mode, rm. 
 // It produces a rounded 52-bit result, Z, the exponent of the rounded 
@ -17,38 +17,34 @@
 // where , denotes the rounding boundary. S is the logical OR of all the
 // bits to the right of R. 
-module rounder (Result, DenormIO, Flags, rm, P, OvEn, 
+module rounder (
-		UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp, 
+   input logic  [2:0]  rm,
-		norm_shift, A, exponent_postsum, A_Norm, B_Norm, exp_A_unmodified, exp_B_unmodified,
+   input logic         P,
-		normal_overflow, normal_underflow, swap, op_type, sum);
+   input logic         OvEn,
-
+   input logic         UnEn,
-   input  [2:0]  rm;
+   input logic         exp_valid,
-   input         P;
+   input logic [3:0] 	 sel_inv,
-   input         OvEn;
+   input logic	 Invalid,
-   input         UnEn;
+   input logic	 DenormIn,
-   input         exp_valid;
+   input logic         Asign,
-   input [3:0] 	 sel_inv;
+   input logic [10:0]  Aexp,
-   input	 Invalid;
+   input logic [5:0] 	 norm_shift,
-   input	 DenormIn;
+   input logic [63:0]  A,
-   input         convert;
+   input logic [10:0]  exponent_postsum,
-   input         Asign;
+   input logic 	 A_Norm,
-   input [10:0]  Aexp;
+   input logic 	 B_Norm,
-   input [5:0] 	 norm_shift;
+   input logic [11:0]  exp_A_unmodified,
-   input [63:0]  A;
+   input logic [11:0]  exp_B_unmodified,
-   input [10:0]  exponent_postsum;
+   input logic 	 normal_overflow,
-   input 	 A_Norm;
+   input logic 	 normal_underflow,
-   input 	 B_Norm;
+   input logic 	 swap,
-   input [11:0]  exp_A_unmodified;
+   input logic [2:0]	 op_type,
-   input [11:0]  exp_B_unmodified;
+   input logic [63:0]  sum,
   input 	 normal_overflow;
   input 	 normal_underflow;
   input 	 swap;
   input [3:0]	 op_type;
   input [63:0]  sum;
-   output [63:0] Result;
+   output logic [63:0] Result,
-   output 	 DenormIO;
+   output logic 	 DenormIO,
-   output [4:0]  Flags;
+   output logic [4:0]  Flags
 );
   wire          Rsign;
   wire 	 Sticky_out;
@ -87,7 +83,6 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
   wire 	 Cout_overflow;
   wire		 Texp_l7z;
   wire		 Texp_l7o;
   wire		 OvCon;
   // Determine the sticky bits for double and single precision
   assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0];
@ -152,7 +147,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
   assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z));
   // Set the overflow and underflow flags. They should not be set if
-   // the input was infinite or NaN or the output of the adder is zero.
+   // the input logic was infinite or NaN or the output logic of the adder is zero.
   // 00 = Valid
   // 10 = NaN
   assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
@ -164,7 +159,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
   assign OverFlow  = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid;
   // The DenormIO is set if underflow has occurred or if their was a
-   // denormalized input. 
+   // denormalized input logic. 
   assign DenormIO = DenormIn | UnderFlow;
   // The final result is Inexact if any rounding occurred ((i.e., R or S 
@ -192,7 +187,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
   // -0 + +0 = -0 (for RD) 
   assign Rzero = ~exp_valid | UnderFlow;
   assign Rsign = DenormIn ?
-		  ( ~(op_type[2] | op_type[1] | op_type[0]) ? 
+		  ( ~(op_type[1] | op_type[0]) ? 
 		  ( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ?
 		  ~Asign : Asign) 
   		  : ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ?
@ -202,7 +197,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
     	          (sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] |
 	          sel_inv[2]&sel_inv[1]&~sel_inv[0] |		  
 	          ~exp_valid&rm[1]&rm[0]&~sel_inv[2] | 
-	          UnderFlow&rm[1]&rm[0]) & ~convert) & ~sel_inv[3]) |
+	          UnderFlow&rm[1]&rm[0])) & ~sel_inv[3]) |
 		  (Asign & sel_inv[3]) );
   // The exponent of the final result is zero if the final result is 
@ -218,7 +213,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
   assign VeryLarge = OverFlow & ~OvEn;
   assign Infinite   = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]);
   assign Largest = VeryLarge & Round_zero;
-   assign Adj_exp = OverFlow & OvEn & ~convert;
+   assign Adj_exp = OverFlow & OvEn;
   assign Rexp[10:1] = ({10{~Valid}} | 
 			{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], 
 			 (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | 
@ -230,7 +225,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
   // Depending on the operation and the signs of the orignal operands,
   // underflow may or may not be needed to round.
   assign Rexp_denorm = DenormIn ? 
-			((~op_type[2] & ~op_type[1] & op_type[0]) ? 
+			((~op_type[1] & op_type[0]) ? 
 				( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ? 
 					( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) 
 					: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) 
@ -238,7 +233,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
 					( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) 
 					: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) 
 				) : 
-			(op_type[3]) ? exp_A_unmodified[10:0] : Rexp; //KEP used to be all of exp_A_unmodified
+			Rexp; //KEP used to be all of exp_A_unmodified
   // If the result is zero or infinity, the mantissa is all zeros. 
   // If the result is NaN, the mantissa is 10...0
@ -256,10 +251,9 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
   // for the final result. A double precision result is returned if 
   // overflow has occurred, the overflow trap is enabled, and a conversion
   // is being performed. 
   assign OvCon = OverFlow & OvEn & convert;
-   assign Result = (op_type[3]) ? {A[63:0]} : (DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : ((P&~OvCon) ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]}
+   assign Result = DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : (P ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]}
-	           : {Rsign, Rexp, Rmant}));
+	           : {Rsign, Rexp, Rmant});
 endmodule // rounder
--- a/wally-pipelined/src/fpu/unpacking.sv
+++ b/wally-pipelined/src/fpu/unpacking.sv
@ -1,11 +1,12 @@
 module unpacking ( 
    input logic  [63:0] X, Y, Z,
    input logic         FmtE,
    input logic  [2:0]  FResultSelE,
    input logic  [2:0]  FOpCtrlE,
    output logic        XSgnE, YSgnE, ZSgnE,
    output logic [10:0] XExpE, YExpE, ZExpE,
    output logic [52:0] XManE, YManE, ZManE,
-    output logic XNormE,
+    output logic XNormE, YNormE,
    output logic XNaNE, YNaNE, ZNaNE,
    output logic XSNaNE, YSNaNE, ZSNaNE,
    output logic XDenormE, YDenormE, ZDenormE,
@ -25,12 +26,9 @@ module unpacking (
    assign YSgnE = FmtE ? Y[63] : Y[31];
    assign ZSgnE = FmtE ? Z[63] : Z[31];
-    assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
+    assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
-    assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
+    assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
-    assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};//{Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]}; 
+    assign ZExpE = FmtE ? Z[62:52] : {Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]}; 
 /*    assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; // *** maybe convert to full number of bits here?
    assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};
    assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};*/
    assign XFracE = FmtE ? X[51:0] : {X[22:0], 29'b0};
    assign YFracE = FmtE ? Y[51:0] : {Y[22:0], 29'b0};
@ -57,6 +55,7 @@ module unpacking (
    assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23];
    assign XNormE = ~(XExpMaxE|XExpZero);
    assign YNormE = ~YExpZero; // only used in addcvt - checks inf and NaN seperately
    assign XNaNE = XExpMaxE & ~XFracZero;
    assign YNaNE = YExpMaxE & ~YFracZero;
@ -78,7 +77,6 @@ module unpacking (
    assign YZeroE = YExpZero & YFracZero;
    assign ZZeroE = ZExpZero & ZFracZero;
-    assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed?
+    assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
    // assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision
 endmodule
`@ -1,3 +1,3 @@`
	`testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat`	`testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat`
	`tr -d ' ' < testFloat > testFloatNoSpace`	`tr -d ' ' < testFloat > testFloatNoSpace`