From fdfc0dbf460f8ed98789a9991965416138139015 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 4 Mar 2021 22:18:19 +0000
Subject: [PATCH] fixed various bugs

---
 wally-pipelined/src/fpu/FMA/add.v       |  6 +--
 wally-pipelined/src/fpu/FMA/align.v     | 32 ++++++------
 wally-pipelined/src/fpu/FMA/expgen.v    | 67 ++++++++++++++-----------
 wally-pipelined/src/fpu/FMA/flag.v      | 53 +++++++++----------
 wally-pipelined/src/fpu/FMA/fmac.v      |  4 +-
 wally-pipelined/src/fpu/FMA/normalize.v | 12 ++---
 wally-pipelined/src/fpu/FMA/round.v     | 61 +++++++++++-----------
 wally-pipelined/src/fpu/FMA/sign.v      | 17 ++++---
 wally-pipelined/src/fpu/FMA/special.v   | 30 +++++------
 9 files changed, 149 insertions(+), 133 deletions(-)

diff --git a/wally-pipelined/src/fpu/FMA/add.v b/wally-pipelined/src/fpu/FMA/add.v
index 9fc6a78d..65d20d6e 100644
--- a/wally-pipelined/src/fpu/FMA/add.v
+++ b/wally-pipelined/src/fpu/FMA/add.v
@@ -35,14 +35,14 @@ module add(r[105:0], s[105:0], t[157:0], sum[157:0],
 	wire		[157:0] 	sum0;			// sum of compound adder +0 mode
 	wire		[157:0] 	sum1;			// sum of compound adder +1 mode
 
-	// Invert addend if necessary 
+	// Invert addend if z's sign is diffrent from the product's sign
 
 	assign t2 = invz ? -t : t;
 	
 	// Zero out product if Z >> product or product really should be zero
 
-	assign r2 = ~proddenorm & killprod ? 106'b0 : r;
-	assign s2 = ~proddenorm & killprod ? 106'b0 : s;
+	assign r2 = killprod ? 106'b0 : r;
+	assign s2 = killprod ? 106'b0 : s;
 
 	// Compound adder
 	// Consists of 3:2 CSA followed by long compound CPA
diff --git a/wally-pipelined/src/fpu/FMA/align.v b/wally-pipelined/src/fpu/FMA/align.v
index dd0c86f7..e70ef7f3 100644
--- a/wally-pipelined/src/fpu/FMA/align.v
+++ b/wally-pipelined/src/fpu/FMA/align.v
@@ -15,17 +15,17 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
              killprod,  bypsel[1], bypplus1, byppostnorm);
 /////////////////////////////////////////////////////////////////////////////
 
-	input 		[51:0]		z;				// Fraction of addend z;
+	input 		[51:0]		z;		// Fraction of addend z;
 	input 		[12:0]		ae;		// sign of exponent of addend z;
-	input 		[11:0]		aligncnt;		// amount to shift
-	input					xzero;			// Input X = 0
-	input                  	yzero;          // Input Y = 0 
-	input                  	zzero;          // Input Z = 0
-	input                  	zdenorm;        // Input Z = denorm
-	input			proddenorm;
+	input 		[11:0]		aligncnt;	// amount to shift
+	input				xzero;		// Input X = 0
+	input                  		yzero;          // Input Y = 0 
+	input                  		zzero;          // Input Z = 0
+	input                  		zdenorm;        // Input Z is denormalized
+	input				proddenorm;	// product is denormalized
 	input     	[1:1] 		bypsel;         // Select bypass to X or Z
-	input					bypplus1;		// Add one to bypassed result
-	input                  	byppostnorm;    // Postnormalize bypassed result 
+	input				bypplus1;	// Add one to bypassed result
+	input                  		byppostnorm;    // Postnormalize bypassed result 
 	output    	[157:0]    	t;              // aligned addend (54 bits left of bpt)
 	output          		bs;           	// sticky bit of addend
 	output          		ps;           	// sticky bit of product
@@ -34,13 +34,13 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 	// Internal nodes
  
 	reg       	[157:0]   	t;				// aligned addend from shifter
-	reg             		killprod;		// Z >> product 
+	reg             		killprod;			// Z >> product 
 	reg             		bs;				// sticky bit of addend
 	reg             		ps;				// sticky bit of product
 	reg       	[7:0]		i;				// temp storage for finding sticky bit
 	wire		[52:0]		z1;				// Z plus 1
 	wire		[51:0]		z2;				// Z selected after handling rounds
-	wire		[11:0]		align104;		// alignment count + 104
+	wire		[11:0]		align104;			// alignment count + 104
 
 	// Increment fraction of Z by  one if necessary for prerounded bypass
 	// This incrementor delay is masked by the alignment count computation
@@ -56,7 +56,7 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 	// addend on right shifts.  Handle special cases of shifting
 	// by too much.
 
-	always @(z2 or aligncnt or align104 or zzero or xzero or yzero or zdenorm)
+	always @(z2 or aligncnt or align104 or zzero or xzero or yzero or zdenorm or proddenorm)
 		begin
 
 		// Default to clearing sticky bits 
@@ -66,7 +66,7 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 		// And to using product as primary operand in adder I exponent gen 
 		killprod = 0;
 
-		if(zzero) begin 
+		if(zzero) begin // if z = 0
 			t = 158'b0;
 			if (xzero || yzero) killprod = 1;
 		end else if ((aligncnt > 53 && ~aligncnt[11]) || xzero || yzero) begin
@@ -75,8 +75,8 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 			t = {53'b0, ~zzero, z2, 52'b0}; 
 			killprod = 1;
 			ps = ~xzero && ~yzero; 
-		end else if ((ae[12] && align104[11])) begin //***fix the if statement
-			// KEP if the multiplier's exponent overflows
+		end else if ((ae[12] && align104[11]) && ~proddenorm) begin //***fix the if statement
+							// KEP if the multiplier's exponent overflows
 			t = {53'b0, ~zzero, z2, 52'b0}; 
 			killprod = 1;
 			ps = ~xzero && ~yzero; 
@@ -85,7 +85,7 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 			t = 0;
 		end else if (~aligncnt[11])  begin 	// Left shift by reasonable amount
 			t = {53'b0, ~zzero, z2, 52'b0} << aligncnt;
-		end else begin                 // Otherwise right shift 
+		end else begin                 		// Otherwise right shift 
 			t = {53'b0, ~zzero, z2, 52'b0} >> -aligncnt;
 
 		// use some behavioral code to find sticky bit.  This is really
diff --git a/wally-pipelined/src/fpu/FMA/expgen.v b/wally-pipelined/src/fpu/FMA/expgen.v
index db8c56b2..edeee96c 100644
--- a/wally-pipelined/src/fpu/FMA/expgen.v
+++ b/wally-pipelined/src/fpu/FMA/expgen.v
@@ -19,7 +19,7 @@ module expgen(x[62:52], y[62:52], z[62:52],
 			   earlyres[62:52], earlyressel, bypsel[1], byppostnorm, 
 			   killprod,  sumzero, postnormalize, normcnt, infinity, 
 			   invalid, overflow, underflow, inf, 
-			   nan, xnan, ynan, znan, zdenorm, specialsel, 
+			   nan, xnan, ynan, znan, zdenorm, proddenorm, specialsel, 
 			   aligncnt, w[62:52], wbypass[62:52],
 			   prodof, sumof, sumuf, denorm0, ae[12:0]);
 /////////////////////////////////////////////////////////////////////////////
@@ -28,36 +28,37 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	input     	[62:52]  	y;         		// Exponent of multiplicand y
 	input     	[62:52]  	z;           	// Exponent of addend z
 	input     	[62:52]	 	earlyres;  		// Result from other FPU block
-	input     				earlyressel;    // Select result from other block
+	input     			earlyressel;    // Select result from other block
 	input     	[1:1] 		bypsel;         // Bypass X or Z
-	input     				byppostnorm;    // Postnormalize bypassed result
-	input     				killprod;    	// Z >> product
-	input     				sumzero;     	// sum exactly equals zero 
-	input     				postnormalize;  // postnormalize rounded result
+	input     			byppostnorm;    // Postnormalize bypassed result
+	input     			killprod;    	// Z >> product
+	input     			sumzero;     	// sum exactly equals zero 
+	input     			postnormalize;  // postnormalize rounded result
 	input     	[8:0]  		normcnt;     	// normalization shift count 
-	input     				infinity;    	// generate infinity on overflow 
-	input     				invalid;     	// Result invalid
-	input     				overflow;    	// Result overflowed
-	input     				underflow;   	// Result underflowed 
-	input     				inf;			// Some input is infinity
-	input     				nan;			// Some input is NaN
-	input     				xnan;			// X is NaN
-	input     				ynan;			// Y is NaN
-	input     				znan;			// Z is NaN 
-	input     				zdenorm;		// Z is denorm
-	input     				specialsel;  	// Select special result
+	input     			infinity;    	// generate infinity on overflow 
+	input     			invalid;     	// Result invalid
+	input     			overflow;    	// Result overflowed
+	input     			underflow;   	// Result underflowed 
+	input     			inf;			// Some input is infinity
+	input     			nan;			// Some input is NaN
+	input     			xnan;			// X is NaN
+	input     			ynan;			// Y is NaN
+	input     			znan;			// Z is NaN 
+	input     			zdenorm;		// Z is denorm
+	input     			proddenorm;		// product is denorm
+	input     			specialsel;  	// Select special result
 	output		[11:0]   	aligncnt;       // shift count for alignment shifter
-	output		[62:52]     w;           	// Exponent of result
-	output		[62:52]     wbypass;     	// Prerounded exponent for bypass 
-	output					prodof;         // X*Y exponent out of bounds 
-	output					sumof;          // X*Y+Z exponent out of bounds 
-	output					sumuf;         // X*Y+Z exponent underflows 
-	output					denorm0;     	// exponent = 0 for denorm 
+	output		[62:52]    	w;           	// Exponent of result
+	output		[62:52]     	wbypass;     	// Prerounded exponent for bypass 
+	output				prodof;         // X*Y exponent out of bounds 
+	output				sumof;          // X*Y+Z exponent out of bounds 
+	output				sumuf;         // X*Y+Z exponent underflows 
+	output				denorm0;     	// exponent = 0 for denorm 
 	output		[12:0]		ae;				//exponent of multiply
 
 	//   Internal nodes
 
-	wire 	[12:0]			aetmp;				// Exponent of Multiply
+
 	wire 	[12:0]			aligncnt0;		// Shift count for alignment
 	wire 	[12:0]			aligncnt1;		// Shift count for alignment
 	wire 	[12:0]			be;				// Exponent of multiply
@@ -72,9 +73,11 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	// Note that the exponent does not have to be incremented on a postrounding
 	//   normalization of X because the mantissa was already increased.   Report
 	//   if exponent is out of bounds 
-	assign ae = x + y  - 1023; 
 
-	assign prodof = (ae > 2046 && ~ae[12] && ~killprod);
+
+	assign ae = x + y  - 1023;
+
+	assign prodof = (ae > 2046 && ~ae[12]);
 
 	// Compute alignment shift count
 	// Adjust for postrounding normalization of Z.
@@ -82,8 +85,10 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	// check if a round overflows is shorter than the actual round and
 	// is masked by the bypass mux and two 10 bit adder delays.
 
-	assign aligncnt0 = z - ae[10:0] + 13'b0;
-	assign aligncnt1 = z - ae[10:0] + 13'b1;
+	assign aligncnt0 = z - ae + 13'b0;// KEP use all of ae
+	assign aligncnt1 = z - ae + 13'b1;	
+	//assign aligncnt0 = z - ae[10:0] + 13'b0;//original
+	//assign aligncnt1 = z - ae[10:0] + 13'b1;
 	assign aligncnt = bypsel[1] && byppostnorm ? aligncnt1 : aligncnt0;
 
 	// Select exponent (usually from product except in case of huge addend)
@@ -118,13 +123,17 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	// rounding mode.  NaNs are propagated or generated.
 
 	assign specialres = earlyressel ? earlyres :
-					invalid ? nanres :
+					invalid | nan ? nanres : // KEP added nan
 					overflow ? infinityres : 
 					inf ? 11'b11111111111 :
 					underflow ? 11'b0 : 11'bx;
 
 	assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
 
+	// IEEE 754-2008 section 6.2.3 states:
+	// "If two or more inputs are NaN, then the payload of the resulting NaN should be 
+	// identical to the payload of one of the input NaNs if representable in the destination
+	// format. This standard does not specify which of the input NaNs will provide the payload."
 	assign nanres = xnan ? x : (ynan ? y : (znan? z : 11'b11111111111));
 
 	// A mux selects the early result from other FPU blocks or the 
diff --git a/wally-pipelined/src/fpu/FMA/flag.v b/wally-pipelined/src/fpu/FMA/flag.v
index d7765c54..901af488 100644
--- a/wally-pipelined/src/fpu/FMA/flag.v
+++ b/wally-pipelined/src/fpu/FMA/flag.v
@@ -13,31 +13,31 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 			 inf, nan, invalid, overflow, underflow, inexact);
 /////////////////////////////////////////////////////////////////////////////
 
-	input                  	xnan;        	// X is NaN 
-	input                  	ynan;        	// Y is NaN 
-	input                 	znan;       	// Z is NaN 
-	input                  	xinf;        	// X is Inf
-	input                 	yinf;       	// Y is Inf 
-	input                  	zinf;        	// Z is Inf
-	input                  	prodof;         // X*Y overflows exponent
-	input                  	sumof;          // X*Y + z underflows exponent
-	input                  	sumuf;          // X*Y + z underflows exponent
-	input					psign; 			// Sign of product
-	input					zsign; 			// Sign of z
-	input					xzero;			// x = 0
-	input					yzero;			// y = 0
-	input     	[1:0]  		v;				// R and S bits of result
-	output					inf;			// Some	source is Inf
-	output					nan;			// Some	source is NaN
-	output					invalid;		// Result is invalid	
-	output					overflow;		// Result overflowed	
-	output					underflow;		// Result underflowed	
-	output					inexact;		// Result is not an exact	number
+	input                  		xnan;        	// X is NaN 
+	input                  		ynan;        	// Y is NaN 
+	input                 		znan;       	// Z is NaN 
+	input                  		xinf;        	// X is Inf
+	input                 		yinf;       	// Y is Inf 
+	input                  		zinf;        	// Z is Inf
+	input                  		prodof;         // X*Y overflows exponent
+	input                  		sumof;          // X*Y + z underflows exponent
+	input                  		sumuf;          // X*Y + z underflows exponent
+	input				psign; 		// Sign of product
+	input				zsign; 		// Sign of z
+	input				xzero;		// x = 0
+	input				yzero;		// y = 0
+	input     	[1:0]  		v;		// R and S bits of result
+	output				inf;		// Some	source is Inf
+	output				nan;		// Some	source is NaN
+	output				invalid;	// Result is invalid	
+	output				overflow;	// Result overflowed	
+	output				underflow;	// Result underflowed	
+	output				inexact;	// Result is not an exact number
  
 	//   Internal nodes
 
-	wire					prodinf;		// X*Y larger than max possible
-	wire					suminf;			// X*Y+Z larger than max possible
+	wire				prodinf;	// X*Y larger than max possible
+	wire				suminf;		// X*Y+Z larger than max possible
 
 	// If any input is NaN, propagate the NaN 
 
@@ -46,12 +46,14 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 	// Same with infinity (inf - inf and O * inf don't propagate inf
 	//  but it's ok becaue illegal op takes higher precidence)
 
-	assign inf= xinf || yinf || zinf;
+	assign inf= xinf || yinf || zinf || suminf;//KEP added suminf 
+	//assign inf= xinf || yinf || zinf;//original
 
 	// Generate infinity checks
 
 	assign prodinf = prodof && ~xnan && ~ynan;
-	assign suminf = sumof && ~xnan && ~ynan && ~znan;
+	//KEP added if the product is infinity then sum is infinity
+	assign suminf = prodinf | sumof && ~xnan && ~ynan && ~znan;
 
 	// Set invalid flag for following cases:
 	//   1) Inf - Inf
@@ -59,8 +61,7 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 	//   3) Output = NaN (this is not part of the IEEE spec,  only 486 proj)
 
 	assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
-					   xzero && yinf || yzero && xinf ||
-					   nan;
+					   xzero && yinf || yzero && xinf;// KEP remove case 3) above
 
 	// Set the overflow flag for the following cases:
 	//   1) Rounded multiply result would be out of bounds
diff --git a/wally-pipelined/src/fpu/FMA/fmac.v b/wally-pipelined/src/fpu/FMA/fmac.v
index 30e1f039..724a8933 100644
--- a/wally-pipelined/src/fpu/FMA/fmac.v
+++ b/wally-pipelined/src/fpu/FMA/fmac.v
@@ -103,7 +103,7 @@ module fmac(xrf, y, zrf, rn, rz, rp, rm,
 						   earlyres[62:52], earlyressel, bypsel[1], byppostnorm,
 						   killprod, sumzero, postnorrnalize, normcnt, 
 						   infinity, invalid, overflow, underflow, 
-						   inf, nan, xnan, ynan, znan, zdenorm, specialsel,
+						   inf, nan, xnan, ynan, znan, zdenorm, proddenorm, specialsel,
 						   aligncnt, w[62:52], wbypass[62:52],
 						   prodof, sumof, sumuf, denorm0, ae);
 // Instantiate special case detection across datapath & exponent path 
@@ -120,7 +120,7 @@ assign wbypass[63] = w[63];
 // Instantiate control logic
  
 sign				sign(x[63], y[63], z[63], negsum0, negsum1, bs, ps, 
-					     killprod, rm, sumzero, nan, invalid, xinf, yinf, inf, 
+					     killprod, rm, overflow, sumzero, nan, invalid, xinf, yinf, zinf, inf, 
 						 w[63], invz, negsum, selsum1, psign); 
 flag				flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 						 psign, z[63], xzero, yzero, v[1:0],
diff --git a/wally-pipelined/src/fpu/FMA/normalize.v b/wally-pipelined/src/fpu/FMA/normalize.v
index c92736d8..3ad8752d 100644
--- a/wally-pipelined/src/fpu/FMA/normalize.v
+++ b/wally-pipelined/src/fpu/FMA/normalize.v
@@ -18,12 +18,12 @@ module normalize(sum[157:0], normcnt, sumzero, bs, ps, denorm0, zdenorm, v[53:0]
 /////////////////////////////////////////////////////////////////////////////
 	input     	[157:0]  	sum;            // sum
 	input		[8:0] 		normcnt;     	// normalization shift count
-	input					sumzero;		// sum is zero
-	input					bs;				// sticky bit for addend
-	input					ps;				// sticky bit for product
-	input					denorm0;		// exponent = -1023
-	input                  	zdenorm;        // Input Z is denormalized
-	output		[53:0]		v;				// normalized sum, R, S bits
+	input				sumzero;	// sum is zero
+	input				bs;		// sticky bit for addend
+	input				ps;		// sticky bit for product
+	input				denorm0;	// exponent = -1023
+	input                  		zdenorm;        // Input Z is denormalized
+	output		[53:0]		v;		// normalized sum, R, S bits
 
 	// Internal nodes
 
diff --git a/wally-pipelined/src/fpu/FMA/round.v b/wally-pipelined/src/fpu/FMA/round.v
index 217f33ad..a4e4e1cd 100644
--- a/wally-pipelined/src/fpu/FMA/round.v
+++ b/wally-pipelined/src/fpu/FMA/round.v
@@ -19,37 +19,37 @@ module round(v[53:0], earlyres[51:0], earlyressel, rz, rn, rp, rm, wsign,
 			  w[51:0], postnormalize, infinity, specialsel);
 /////////////////////////////////////////////////////////////////////////////
 
-	input		[53:0]		v;				// normalized sum, R, S bits
-	input		[51:0]		earlyres;		// result from other FPU blocks
-	input 					earlyressel; 	// use result from other FPU blocks
-	input					rz;				// Round toward zero
-	input					rn;				// Round toward	nearest
-	input					rp;				// Round toward	plus infinity
-	input					rm;				// Round toward	minus infinity
-	input					wsign;			// Sign of result
-	input 					invalid;		// Trap on infinity, NaN, denorm
-	input					overflow;		// Result overflowed
-	input					underflow;		// Result underflowed
-	input					inf;			// Some input is infinity
-	input					nan;			// Some input is NaN
-	input					xnan;			// X is NaN
-	input					ynan;			// Y is NaN
-	input					znan;			// Z is NaN
-	input		[51:0]		x;				// Input X
-	input		[51:0]		y;				// Input Y
-	input		[51:0]		z;				// Input Z
-	output		[51:0]		w; 				// rounded result of FMAC
-	output					postnormalize; 	// Right shift 1 for post-rounding norm
-	output					infinity;    	// Generate infinity on overflow
-	output					specialsel;  	// Select special result
+	input		[53:0]		v;		// normalized sum, R, S bits
+	input		[51:0]		earlyres;	// result from other FPU blocks
+	input 				earlyressel; 	// use result from other FPU blocks
+	input				rz;		// Round toward zero
+	input				rn;		// Round toward	nearest
+	input				rp;		// Round toward	plus infinity
+	input				rm;		// Round toward	minus infinity
+	input				wsign;		// Sign of result
+	input 				invalid;	// Trap on infinity, NaN, denorm
+	input				overflow;	// Result overflowed
+	input				underflow;	// Result underflowed
+	input				inf;		// Some input is infinity
+	input				nan;		// Some input is NaN
+	input				xnan;		// X is NaN
+	input				ynan;		// Y is NaN
+	input				znan;		// Z is NaN
+	input		[51:0]		x;		// Input X
+	input		[51:0]		y;		// Input Y
+	input		[51:0]		z;		// Input Z
+	output		[51:0]		w; 		// rounded result of FMAC
+	output				postnormalize; 	// Right shift 1 for post-rounding norm
+	output				infinity;    	// Generate infinity on overflow
+	output				specialsel;  	// Select special result
 
 	// Internal nodes
 
-	wire					plus1;			// Round by adding one 
-	wire		[52:0]		v1;				// Result + 1 (for rounding)
-	wire		[51:0]		specialres;		// Result of exceptional case 
+	wire				plus1;		// Round by adding one 
+	wire		[52:0]		v1;		// Result + 1 (for rounding)
+	wire		[51:0]		specialres;	// Result of exceptional case 
 	wire		[51:0]		infinityres;	// Infinity or largest real number
-	wire		[51:0]		nanres;			// Propagated or generated NaN 
+	wire		[51:0]		nanres;		// Propagated or generated NaN 
 
 	// Compute if round should occur.  This equation is derived from
 	// the rounding tables.
@@ -77,7 +77,7 @@ module round(v[53:0], earlyres[51:0], earlyressel, rz, rn, rp, rm, wsign,
 	assign specialsel = earlyressel || overflow || underflow || invalid ||
 							nan || inf;
 	assign specialres = earlyressel ? earlyres : 
-						 invalid ? nanres : 
+						 invalid | nan ? nanres : //KEP added nan
 						 overflow ? infinityres : 
 						 inf ? 52'b0 :
 						underflow ? 52'b0 : 52'bx;  // default to undefined 
@@ -93,6 +93,11 @@ module round(v[53:0], earlyres[51:0], earlyressel, rz, rn, rp, rm, wsign,
 	// NaN inputs are already quiet, we don't have to force them quiet.
 
 	// assign nanres = xnan ? x: (ynan ? y : (znan ? z : {1'b1, 51'b0})); // original
+
+	// IEEE 754-2008 section 6.2.3 states:
+	// "If two or more inputs are NaN, then the payload of the resulting NaN should be 
+	// identical to the payload of one of the input NaNs if representable in the destination
+	// format. This standard does not specify which of the input NaNs will provide the payload."
 	assign nanres = xnan ? {1'b1, x[50:0]}: (ynan ? {1'b1, y[50:0]} : (znan ? {1'b1, z[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
 
 	// Select result with 4:1 mux
diff --git a/wally-pipelined/src/fpu/FMA/sign.v b/wally-pipelined/src/fpu/FMA/sign.v
index 9503847f..48fd716f 100644
--- a/wally-pipelined/src/fpu/FMA/sign.v
+++ b/wally-pipelined/src/fpu/FMA/sign.v
@@ -10,8 +10,8 @@
 /////////////////////////////////////////////////////////////////////////////
 
 /////////////////////////////////////////////////////////////////////////////
-module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
-			 sumzero, nan, invalid, xinf, yinf, inf, wsign, invz, negsum, selsum1, psign);
+module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm, overflow,
+			 sumzero, nan, invalid, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign);
 ////////////////////////////////////////////////////////////////////////////I
  
 	input					xsign;			// Sign of X 
@@ -23,11 +23,13 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
 	input					ps;				// sticky bit from product
 	input					killprod;		// Product forced to zero
 	input					rm;				// Round toward minus infinity
+	input					overflow;				// Round toward minus infinity
 	input					sumzero;		// Sum = O
 	input					nan;			// Some input is NaN
 	input					invalid;		// Result invalid
 	input					xinf;			// X = Inf
 	input					yinf;			// Y = Inf
+	input					zinf;			// Y = Inf
 	input					inf;			// Some input = Inf
 	output					wsign;			// Sign of W 
 	output					invz;			// Invert addend into adder
@@ -47,13 +49,13 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
 	assign psign = xsign ^ ysign;
 
 	// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
-	assign invz = zsign ^ psign;
+	assign invz = (zsign ^ psign);
 	// Select +l mode for adder and compute if result must be negated
 	// This is done according to cases based on the sticky bit.
 
 	always @(invz or negsum0 or negsum1 or bs or ps)
 		begin
-			if (~invz) begin               // both inputs have same sign
+			if (~invz) begin               // both inputs have same sign //KEP if overflow 
 				negsum = 0;
 				selsum1 = 0;
 			end else if (bs) begin        // sticky bit set on addend
@@ -85,9 +87,8 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
 	//			 sum/difference shall be -0.  However, x+x = x-(-X) retains the same sign as x even when x is zero."
  
 	assign zerosign = (~invz && killprod) ? zsign : rm;
-	assign infsign = psign; //KEP 210112 keep the correct sign when result is infinity
-	// assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
-	assign wsign =invalid? 0 : (inf ? infsign:
-								(sumzero ? zerosign : psign ^ negsum));
+	assign infsign = zinf ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
+	//assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
+	assign wsign = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
 
 endmodule
diff --git a/wally-pipelined/src/fpu/FMA/special.v b/wally-pipelined/src/fpu/FMA/special.v
index ec588ca9..f2201f5c 100644
--- a/wally-pipelined/src/fpu/FMA/special.v
+++ b/wally-pipelined/src/fpu/FMA/special.v
@@ -14,23 +14,23 @@ module special(x[63:0], y[63:0], z[63:0], ae, xzero, yzero, zzero,
 				xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, xinf, yinf, zinf);
 /////////////////////////////////////////////////////////////////////////////
 
-	input   		[63:0]     	x;             // Input x
+	input   	[63:0]     	x;              // Input x
 	input     	[63:0]     	y;           	// Input Y
 	input      	[63:0]    	z;            	// Input z 
-	input		[12:0]			ae;			// exponent of product
-	output						xzero;			// Input x = 0
-	output						yzero;			// Input y = 0
-	output						zzero;			// Input z = 0
-	output						xnan;			// x is NaN
-	output						ynan;			// y is NaN
-	output						znan;			// z is NaN
-	output						xdenorm;		// x is denormalized
-	output						ydenorm;		// y is denormalized
-	output						zdenorm;		// z is denormalized
-	output						proddenorm;		// product is denormalized
-	output						xinf;			// x is infinity
-	output						yinf;			// y is infinity
-	output						zinf;			// z is infinity
+	input		[12:0]		ae;		// exponent of product
+	output				xzero;		// Input x = 0
+	output				yzero;		// Input y = 0
+	output				zzero;		// Input z = 0
+	output				xnan;		// x is NaN
+	output				ynan;		// y is NaN
+	output				znan;		// z is NaN
+	output				xdenorm;	// x is denormalized
+	output				ydenorm;	// y is denormalized
+	output				zdenorm;	// z is denormalized
+	output				proddenorm;	// product is denormalized
+	output				xinf;		// x is infinity
+	output				yinf;		// y is infinity
+	output				zinf;		// z is infinity
 
 	// In the actual circuit design, the gates looking at bits
 	// 51:0 and at bits 62:52 should be shared among the various detectors.