fixed various bugs

2025-02-11 06:05:49 +00:00 · 2021-03-04 22:18:19 +00:00 · 2021-03-04 22:18:19 +00:00 · fdfc0dbf46
commit fdfc0dbf46
parent 57e484cd55
9 changed files with 149 additions and 133 deletions
--- a/wally-pipelined/src/fpu/FMA/add.v
+++ b/wally-pipelined/src/fpu/FMA/add.v
@ -35,14 +35,14 @@ module add(r[105:0], s[105:0], t[157:0], sum[157:0],
 	wire		[157:0] 	sum0;			// sum of compound adder +0 mode
 	wire		[157:0] 	sum1;			// sum of compound adder +1 mode
-	// Invert addend if necessary 
+	// Invert addend if z's sign is diffrent from the product's sign
 	assign t2 = invz ? -t : t;
 	// Zero out product if Z >> product or product really should be zero
-	assign r2 = ~proddenorm & killprod ? 106'b0 : r;
+	assign r2 = killprod ? 106'b0 : r;
-	assign s2 = ~proddenorm & killprod ? 106'b0 : s;
+	assign s2 = killprod ? 106'b0 : s;
 	// Compound adder
 	// Consists of 3:2 CSA followed by long compound CPA
--- a/wally-pipelined/src/fpu/FMA/align.v
+++ b/wally-pipelined/src/fpu/FMA/align.v
@ -21,8 +21,8 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 	input				xzero;		// Input X = 0
 	input                  		yzero;          // Input Y = 0 
 	input                  		zzero;          // Input Z = 0
-	input                  	zdenorm;        // Input Z = denorm
+	input                  		zdenorm;        // Input Z is denormalized
-	input			proddenorm;
+	input				proddenorm;	// product is denormalized
 	input     	[1:1] 		bypsel;         // Select bypass to X or Z
 	input				bypplus1;	// Add one to bypassed result
 	input                  		byppostnorm;    // Postnormalize bypassed result 
@ -56,7 +56,7 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 	// addend on right shifts.  Handle special cases of shifting
 	// by too much.
-	always @(z2 or aligncnt or align104 or zzero or xzero or yzero or zdenorm)
+	always @(z2 or aligncnt or align104 or zzero or xzero or yzero or zdenorm or proddenorm)
 		begin
 		// Default to clearing sticky bits 
@ -66,7 +66,7 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 		// And to using product as primary operand in adder I exponent gen 
 		killprod = 0;
-		if(zzero) begin 
+		if(zzero) begin // if z = 0
 			t = 158'b0;
 			if (xzero || yzero) killprod = 1;
 		end else if ((aligncnt > 53 && ~aligncnt[11]) || xzero || yzero) begin
@ -75,7 +75,7 @@ module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddeno
 			t = {53'b0, ~zzero, z2, 52'b0}; 
 			killprod = 1;
 			ps = ~xzero && ~yzero; 
-		end else if ((ae[12] && align104[11])) begin //***fix the if statement
+		end else if ((ae[12] && align104[11]) && ~proddenorm) begin //***fix the if statement
 							// KEP if the multiplier's exponent overflows
 			t = {53'b0, ~zzero, z2, 52'b0}; 
 			killprod = 1;
--- a/wally-pipelined/src/fpu/FMA/expgen.v
+++ b/wally-pipelined/src/fpu/FMA/expgen.v
@ -19,7 +19,7 @@ module expgen(x[62:52], y[62:52], z[62:52],
 			   earlyres[62:52], earlyressel, bypsel[1], byppostnorm, 
 			   killprod,  sumzero, postnormalize, normcnt, infinity, 
 			   invalid, overflow, underflow, inf, 
-			   nan, xnan, ynan, znan, zdenorm, specialsel, 
+			   nan, xnan, ynan, znan, zdenorm, proddenorm, specialsel, 
 			   aligncnt, w[62:52], wbypass[62:52],
 			   prodof, sumof, sumuf, denorm0, ae[12:0]);
 /////////////////////////////////////////////////////////////////////////////
@ -45,6 +45,7 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	input     			ynan;			// Y is NaN
 	input     			znan;			// Z is NaN 
 	input     			zdenorm;		// Z is denorm
 	input     			proddenorm;		// product is denorm
 	input     			specialsel;  	// Select special result
 	output		[11:0]   	aligncnt;       // shift count for alignment shifter
 	output		[62:52]    	w;           	// Exponent of result
@ -57,7 +58,7 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	//   Internal nodes
-	wire 	[12:0]			aetmp;				// Exponent of Multiply
+
 	wire 	[12:0]			aligncnt0;		// Shift count for alignment
 	wire 	[12:0]			aligncnt1;		// Shift count for alignment
 	wire 	[12:0]			be;				// Exponent of multiply
@ -72,9 +73,11 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	// Note that the exponent does not have to be incremented on a postrounding
 	//   normalization of X because the mantissa was already increased.   Report
 	//   if exponent is out of bounds 
 	assign ae = x + y  - 1023;
-	assign prodof = (ae > 2046 && ~ae[12] && ~killprod);
+	assign prodof = (ae > 2046 && ~ae[12]);
 	// Compute alignment shift count
 	// Adjust for postrounding normalization of Z.
@ -82,8 +85,10 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	// check if a round overflows is shorter than the actual round and
 	// is masked by the bypass mux and two 10 bit adder delays.
-	assign aligncnt0 = z - ae[10:0] + 13'b0;
+	assign aligncnt0 = z - ae + 13'b0;// KEP use all of ae
-	assign aligncnt1 = z - ae[10:0] + 13'b1;
+	assign aligncnt1 = z - ae + 13'b1;	
 	//assign aligncnt0 = z - ae[10:0] + 13'b0;//original
 	//assign aligncnt1 = z - ae[10:0] + 13'b1;
 	assign aligncnt = bypsel[1] && byppostnorm ? aligncnt1 : aligncnt0;
 	// Select exponent (usually from product except in case of huge addend)
@ -118,13 +123,17 @@ module expgen(x[62:52], y[62:52], z[62:52],
 	// rounding mode.  NaNs are propagated or generated.
 	assign specialres = earlyressel ? earlyres :
-					invalid ? nanres :
+					invalid | nan ? nanres : // KEP added nan
 					overflow ? infinityres : 
 					inf ? 11'b11111111111 :
 					underflow ? 11'b0 : 11'bx;
 	assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
 	// IEEE 754-2008 section 6.2.3 states:
 	// "If two or more inputs are NaN, then the payload of the resulting NaN should be 
 	// identical to the payload of one of the input NaNs if representable in the destination
 	// format. This standard does not specify which of the input NaNs will provide the payload."
 	assign nanres = xnan ? x : (ynan ? y : (znan? z : 11'b11111111111));
 	// A mux selects the early result from other FPU blocks or the 
--- a/wally-pipelined/src/fpu/FMA/flag.v
+++ b/wally-pipelined/src/fpu/FMA/flag.v
@ -46,12 +46,14 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 	// Same with infinity (inf - inf and O * inf don't propagate inf
 	//  but it's ok becaue illegal op takes higher precidence)
-	assign inf= xinf || yinf || zinf;
+	assign inf= xinf || yinf || zinf || suminf;//KEP added suminf 
 	//assign inf= xinf || yinf || zinf;//original
 	// Generate infinity checks
 	assign prodinf = prodof && ~xnan && ~ynan;
-	assign suminf = sumof && ~xnan && ~ynan && ~znan;
+	//KEP added if the product is infinity then sum is infinity
 	assign suminf = prodinf | sumof && ~xnan && ~ynan && ~znan;
 	// Set invalid flag for following cases:
 	//   1) Inf - Inf
@ -59,8 +61,7 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 	//   3) Output = NaN (this is not part of the IEEE spec,  only 486 proj)
 	assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
-					   xzero && yinf || yzero && xinf ||
+					   xzero && yinf || yzero && xinf;// KEP remove case 3) above
 					   nan;
 	// Set the overflow flag for the following cases:
 	//   1) Rounded multiply result would be out of bounds
--- a/wally-pipelined/src/fpu/FMA/fmac.v
+++ b/wally-pipelined/src/fpu/FMA/fmac.v
@ -103,7 +103,7 @@ module fmac(xrf, y, zrf, rn, rz, rp, rm,
 						   earlyres[62:52], earlyressel, bypsel[1], byppostnorm,
 						   killprod, sumzero, postnorrnalize, normcnt, 
 						   infinity, invalid, overflow, underflow, 
-						   inf, nan, xnan, ynan, znan, zdenorm, specialsel,
+						   inf, nan, xnan, ynan, znan, zdenorm, proddenorm, specialsel,
 						   aligncnt, w[62:52], wbypass[62:52],
 						   prodof, sumof, sumuf, denorm0, ae);
 // Instantiate special case detection across datapath & exponent path 
@ -120,7 +120,7 @@ assign wbypass[63] = w[63];
 // Instantiate control logic
 sign				sign(x[63], y[63], z[63], negsum0, negsum1, bs, ps, 
-					     killprod, rm, sumzero, nan, invalid, xinf, yinf, inf, 
+					     killprod, rm, overflow, sumzero, nan, invalid, xinf, yinf, zinf, inf, 
 						 w[63], invz, negsum, selsum1, psign); 
 flag				flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 						 psign, z[63], xzero, yzero, v[1:0],
--- a/wally-pipelined/src/fpu/FMA/round.v
+++ b/wally-pipelined/src/fpu/FMA/round.v
@ -77,7 +77,7 @@ module round(v[53:0], earlyres[51:0], earlyressel, rz, rn, rp, rm, wsign,
 	assign specialsel = earlyressel || overflow || underflow || invalid ||
 							nan || inf;
 	assign specialres = earlyressel ? earlyres : 
-						 invalid ? nanres : 
+						 invalid | nan ? nanres : //KEP added nan
 						 overflow ? infinityres : 
 						 inf ? 52'b0 :
 						underflow ? 52'b0 : 52'bx;  // default to undefined 
@ -93,6 +93,11 @@ module round(v[53:0], earlyres[51:0], earlyressel, rz, rn, rp, rm, wsign,
 	// NaN inputs are already quiet, we don't have to force them quiet.
 	// assign nanres = xnan ? x: (ynan ? y : (znan ? z : {1'b1, 51'b0})); // original
 	// IEEE 754-2008 section 6.2.3 states:
 	// "If two or more inputs are NaN, then the payload of the resulting NaN should be 
 	// identical to the payload of one of the input NaNs if representable in the destination
 	// format. This standard does not specify which of the input NaNs will provide the payload."
 	assign nanres = xnan ? {1'b1, x[50:0]}: (ynan ? {1'b1, y[50:0]} : (znan ? {1'b1, z[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
 	// Select result with 4:1 mux
--- a/wally-pipelined/src/fpu/FMA/sign.v
+++ b/wally-pipelined/src/fpu/FMA/sign.v
@ -10,8 +10,8 @@
 /////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////
-module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
+module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm, overflow,
-			 sumzero, nan, invalid, xinf, yinf, inf, wsign, invz, negsum, selsum1, psign);
+			 sumzero, nan, invalid, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign);
 ////////////////////////////////////////////////////////////////////////////I
 	input					xsign;			// Sign of X 
@ -23,11 +23,13 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
 	input					ps;				// sticky bit from product
 	input					killprod;		// Product forced to zero
 	input					rm;				// Round toward minus infinity
 	input					overflow;				// Round toward minus infinity
 	input					sumzero;		// Sum = O
 	input					nan;			// Some input is NaN
 	input					invalid;		// Result invalid
 	input					xinf;			// X = Inf
 	input					yinf;			// Y = Inf
 	input					zinf;			// Y = Inf
 	input					inf;			// Some input = Inf
 	output					wsign;			// Sign of W 
 	output					invz;			// Invert addend into adder
@ -47,13 +49,13 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
 	assign psign = xsign ^ ysign;
 	// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
-	assign invz = zsign ^ psign;
+	assign invz = (zsign ^ psign);
 	// Select +l mode for adder and compute if result must be negated
 	// This is done according to cases based on the sticky bit.
 	always @(invz or negsum0 or negsum1 or bs or ps)
 		begin
-			if (~invz) begin               // both inputs have same sign
+			if (~invz) begin               // both inputs have same sign //KEP if overflow 
 				negsum = 0;
 				selsum1 = 0;
 			end else if (bs) begin        // sticky bit set on addend
@ -85,9 +87,8 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
 	//			 sum/difference shall be -0.  However, x+x = x-(-X) retains the same sign as x even when x is zero."
 	assign zerosign = (~invz && killprod) ? zsign : rm;
-	assign infsign = psign; //KEP 210112 keep the correct sign when result is infinity
+	assign infsign = zinf ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
-	// assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
+	//assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
-	assign wsign =invalid? 0 : (inf ? infsign:
+	assign wsign = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
 								(sumzero ? zerosign : psign ^ negsum));
 endmodule