fixed synth bugs in fpu

2021-04-19 00:39:16 +00:00 · 2021-04-19 00:39:16 +00:00 · d12eb0f4eb
commit d12eb0f4eb
parent 2af4e2f4ac
18 changed files with 265 additions and 285 deletions
--- a/wally-pipelined/src/fpu/add.sv
+++ b/wally-pipelined/src/fpu/add.sv
@ -15,16 +15,16 @@ module add(rM, sM, tM, sum,
 		   negsum, invz, selsum1, negsum0, negsum1, killprodM);
 ////////////////////////////////////////////////////////////////////////////////

-	input 		[105:0]		rM;     			// partial product 1
-	input 		[105:0]		sM;              // partial product 2
-	input 		[163:0]		tM;             	// aligned addend 
-	input					invz;       	// invert addend
-	input 					selsum1;    	// select +1 mode of compound adder 
-	input					killprodM;    	// z >> product
-	input					negsum;      	// Negate sum 
-	output		[163:0]		sum;         	// sum
-	output					negsum0;     	// sum was negative in +0 mode
-	output					negsum1;     	// sum was negative in +1 mode 
+	input logic 		[105:0]		rM;     			// partial product 1
+	input logic 		[105:0]		sM;              // partial product 2
+	input logic 		[163:0]		tM;             	// aligned addend 
+	input logic					invz;       	// invert addend
+	input logic 					selsum1;    	// select +1 mode of compound adder 
+	input logic					killprodM;    	// z >> product
+	input logic					negsum;      	// Negate sum 
+	output logic		[163:0]		sum;         	// sum
+	output logic					negsum0;     	// sum was negative in +0 mode
+	output logic					negsum1;     	// sum was negative in +1 mode 

 	// Internal nodes

@ -44,11 +44,12 @@ module add(rM, sM, tM, sum,
 	assign r2 = killprodM ? 106'b0 : rM;
 	assign s2 = killprodM ? 106'b0 : sM;

+	//replace this with a more structural cpa that synthisises better
 	// Compound adder
 	// Consists of 3:2 CSA followed by long compound CPA
-	assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
-	assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
-	assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above
+	// assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
+	assign sum0 = {1'b0,prodshifted} + t2 + 158'b0 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0};
+	assign sum1 = {1'b0,prodshifted} + t2 + 158'b1 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0}; // +1 from invert of z above
 	
 	// Check sign bits in +0/1 modes 
 	assign negsum0 = sum0[164];
--- a/wally-pipelined/src/fpu/align.sv
+++ b/wally-pipelined/src/fpu/align.sv
@ -15,33 +15,26 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
             killprodE,  sumshiftE, sumshiftzeroE);
 /////////////////////////////////////////////////////////////////////////////

-	input 		[51:0]		zman;		// Fraction of addend z;
-	input 		[12:0]		aligncntE;	// amount to shift
-	input				xzeroE;		// Input X = 0
-	input                  		yzeroE;          // Input Y = 0 
-	input                  		zzeroE;          // Input Z = 0
-	input                  		zdenormE;        // Input Z is denormalized
-	output    	[163:0]    	tE;              // aligned addend (54 bits left of bpt)
-	output          		bsE;           	// sticky bit of addend
-	output          		killprodE;    	// Z >> product
-	output		[7:0]		sumshiftE;	
-	output				sumshiftzeroE;
+	input logic 		[51:0]		zman;		// Fraction of addend z;
+	input logic 		[12:0]		aligncntE;	// amount to shift
+	input logic				xzeroE;		// Input X = 0
+	input logic                  		yzeroE;          // Input Y = 0 
+	input logic                  		zzeroE;          // Input Z = 0
+	input logic                  		zdenormE;        // Input Z is denormalized
+	output logic    	[163:0]    	tE;              // aligned addend (54 bits left of bpt)
+	output logic          		bsE;           	// sticky bit of addend
+	output logic          		killprodE;    	// Z >> product
+	output logic		[8:0]		sumshiftE;	
+	output logic				sumshiftzeroE;

 	// Internal nodes
 
-	reg       	[163:0]   	tE;				// aligned addend from shifter
 	reg       	[215:0]   	shift;				// aligned addend from shifter
-	reg             		killprodE;			// Z >> product 
-	reg             		bsE;				// sticky bit of addend
-	reg             		ps;				// sticky bit of product
-	reg             		zexpsel;				// sticky bit of product
+	logic         		zexpsel;				// sticky bit of product
 	reg       	[7:0]		i;				// temp storage for finding sticky bit
 	wire		[52:0]		z1;				// Z plus 1
 	wire		[51:0]		z2;				// Z selected after handling rounds
-	wire		[11:0]		align104;			// alignment count + 104
-	logic		[8:0]		sumshiftE;
-	logic sumshiftzeroE;
-
+	


 	// Compute sign of aligncntE + 104 to check for shifting too far right 
@ -51,18 +44,18 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
 	// Shift addend by alignment count.  Generate sticky bits from
 	// addend on right shifts.  Handle special cases of shifting
 	// by too much.
-
-	always @(aligncntE or xzeroE or yzeroE or zman or zdenormE or zzeroE)
+//***change always @ to always_combs
+	always_comb 
 		begin

 		// Default to clearing sticky bits 
 		bsE = 0;
-		ps = 0;

 		// And to using product as primary operand in adder I exponent gen 
 		killprodE = xzeroE | yzeroE;
 		// d = aligncntE
 		// p = 53
+		//***try reducing this hardware try getting onw shifter
 		if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1
 			//product ancored case with saturated shift
 			sumshiftE = 163;	// 3p+4	
--- a/wally-pipelined/src/fpu/booth.sv
+++ b/wally-pipelined/src/fpu/booth.sv
@ -1,21 +1,19 @@
 module booth(xExt, choose, add1, e, pp); 
 /////////////////////////////////////////////////////////////////////////////
    
-	input 		[53:0]		xExt;				// multiplicand	xExt
-	input		[2:0]		choose;				// bits needed to choose which encoding
-	output		[1:0]       	add1;				// do you add 1	
-    output                  e;
-	output		[54:0]		pp;				//	the resultant encoding
+	input logic 		[53:0]		xExt;				// multiplicand	xExt
+	input logic		[2:0]		choose;				// bits needed to choose which encoding
+	output logic		[1:0]       	add1;				// do you add 1	
+    output logic                  e;
+	output logic		[54:0]		pp;				//	the resultant encoding
    
-    logic [54:0] pp, temp;
-    logic e;
-    logic [1:0] add1;
+    logic [54:0] temp;
    logic [53:0] negx;
    //logic temp;

    assign negx = ~xExt;

-    always @(choose, xExt, negx)
+    always_comb
    case (choose)
        3'b000 : pp = 55'b0;   //  0
        3'b001 : pp = {1'b0, xExt};  //  1
@ -27,7 +25,7 @@ module booth(xExt, choose, add1, e, pp);
        3'b111 : pp = 55'hfffffffffffffff;  //  -0
    endcase

-    always @(choose, xExt, negx)
+    always_comb
    case (choose)
        3'b000 : e = 0;   //  0
        3'b001 : e = 0;  //  1
@ -40,7 +38,7 @@ module booth(xExt, choose, add1, e, pp);
    endcase
    // assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
    // assign add1 = choose[2];
-    always @(choose)
+    always_comb
    case (choose)
        3'b000 : add1 = 2'b0;   //  0
        3'b001 : add1 = 2'b0;  //  1
--- a/wally-pipelined/src/fpu/compressors.sv
+++ b/wally-pipelined/src/fpu/compressors.sv
@ -3,11 +3,11 @@ module add3comp2(a, b, c, carry, sum);
 //look into diffrent implementations of the compressors?
    
    parameter BITS = 4;
-	input 		[BITS-1:0]		a;
-	input		[BITS-1:0]		b;
-	input		[BITS-1:0]    	c;
-    output      [BITS-1:0]      carry;
-	output		[BITS-1:0]		sum;
+	input logic 		[BITS-1:0]		a;
+	input logic		[BITS-1:0]		b;
+	input logic		[BITS-1:0]    	c;
+    output logic      [BITS-1:0]      carry;
+	output logic		[BITS-1:0]		sum;
    genvar i;

    generate
@ -22,12 +22,12 @@ module add4comp2(a, b, c, d, carry, sum);
 /////////////////////////////////////////////////////////////////////////////
    
    parameter BITS = 4;
-	input 		[BITS-1:0]		a;
-	input		[BITS-1:0]		b;
-	input		[BITS-1:0]    	c;
-	input		[BITS-1:0]    	d;
-    output      [BITS:0]      carry;
-	output		[BITS-1:0]		sum;
+	input logic 		[BITS-1:0]		a;
+	input logic		[BITS-1:0]		b;
+	input logic		[BITS-1:0]    	c;
+	input logic		[BITS-1:0]    	d;
+    output logic      [BITS:0]      carry;
+	output logic		[BITS-1:0]		sum;

    logic       [BITS-1:0]      cout;
    logic                       carryTmp;
@ -54,11 +54,11 @@ module sng3comp2(a, b, c, carry, sum);
 /////////////////////////////////////////////////////////////////////////////
 //look into diffrent implementations of the compressors?
    
-	input 				a;
-	input				b;
-	input		       	c;
-    output              carry;
-	output				sum;
+	input logic 				a;
+	input logic				b;
+	input logic		       	c;
+    output logic              carry;
+	output logic				sum;
    
    logic               axorb;

@ -73,14 +73,14 @@ module sng4comp2(a, b, c, d, cin, cout, carry, sum);
 /////////////////////////////////////////////////////////////////////////////
 //look into pass gate 4:2 counters?
    
-	input 				a;
-	input				b;
-	input		       	c;
-    input               d;
-    input               cin;
-    output              cout;
-    output              carry;
-	output				sum;
+	input logic 				a;
+	input logic				b;
+	input logic	       	c;
+    input logic               d;
+    input logic               cin;
+    output logic              cout;
+    output logic              carry;
+	output logic				sum;
    
    logic               TmpSum;

--- a/wally-pipelined/src/fpu/expgen1.sv
+++ b/wally-pipelined/src/fpu/expgen1.sv
@ -20,17 +20,17 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 			   aligncntE, prodof, aeE);
 /////////////////////////////////////////////////////////////////////////////
  
-	input     	[62:52]    	xexp;           	// Exponent of multiplicand x
-	input     	[62:52]  	yexp;         		// Exponent of multiplicand y
-	input     	[62:52]  	zexp;           	// Exponent of addend z
-	input     			xdenormE;		// Z is denorm
-	input     			ydenormE;		// Z is denorm
-	input     			zdenormE;		// Z is denorm
-	input     			xzeroE;		// Z is denorm
-	input     			yzeroE;		// Z is denorm
-	output		[12:0]   	aligncntE;       // shift count for alignment shifter
-	output				prodof;         // X*Y exponent out of bounds 
-	output		[12:0]		aeE;				//exponent of multiply
+	input logic     	[62:52]    	xexp;           	// Exponent of multiplicand x
+	input logic     	[62:52]  	yexp;         		// Exponent of multiplicand y
+	input logic     	[62:52]  	zexp;           	// Exponent of addend z
+	input logic     			xdenormE;		// Z is denorm
+	input logic     			ydenormE;		// Z is denorm
+	input logic     			zdenormE;		// Z is denorm
+	input logic     			xzeroE;		// Z is denorm
+	input logic     			yzeroE;		// Z is denorm
+	output logic		[12:0]   	aligncntE;       // shift count for alignment shifter
+	output logic			prodof;         // X*Y exponent out of bounds 
+	output logic		[12:0]		aeE;				//exponent of multiply

 	//   Internal nodes

--- a/wally-pipelined/src/fpu/expgen2.sv
+++ b/wally-pipelined/src/fpu/expgen2.sv
@ -23,24 +23,24 @@ module expgen2(xexp, yexp, zexp,
 			   sumof, sumuf);
 /////////////////////////////////////////////////////////////////////////////
  
-	input     	[62:52]    	xexp;           	// Exponent of multiplicand x
-	input     	[62:52]  	yexp;         		// Exponent of multiplicand y
-	input     	[62:52]  	zexp;           	// Exponent of addend z
-	input     			sumzero;     	// sum exactly equals zero 
-	input     			resultdenorm;  // postnormalize rounded result
-	input     			infinity;    	// generate infinity on overflow 
-	input     	[4:0]	FmaFlagsM;     	// Result invalid
-	input     			inf;			// Some input is infinity
-	input     			nanM;			// Some input is NaN
-	input     	[12:0]		de0;			// X is NaN NaN
-	input     			xnanM;			// X is NaN
-	input     			ynanM;			// Y is NaN
-	input     			znanM;			// Z is NaN 
-	input				expplus1;
-	input     			specialsel;  	// Select special result
-	output		[62:52]    	wexp;           	// Exponent of result
-	output				sumof;          // X*Y+Z exponent out of bounds 
-	output				sumuf;         // X*Y+Z exponent underflows 
+	input logic     	[62:52]    	xexp;           	// Exponent of multiplicand x
+	input logic     	[62:52]  	yexp;         		// Exponent of multiplicand y
+	input logic     	[62:52]  	zexp;           	// Exponent of addend z
+	input logic     			sumzero;     	// sum exactly equals zero 
+	input logic     			resultdenorm;  // postnormalize rounded result
+	input logic     			infinity;    	// generate infinity on overflow 
+	input logic     	[4:0]	FmaFlagsM;     	// Result invalid
+	input logic     			inf;			// Some input is infinity
+	input logic     			nanM;			// Some input is NaN
+	input logic     	[12:0]		de0;			// X is NaN NaN
+	input logic     			xnanM;			// X is NaN
+	input logic    			ynanM;			// Y is NaN
+	input logic     			znanM;			// Z is NaN 
+	input logic				expplus1;
+	input logic     			specialsel;  	// Select special result
+	output logic		[62:52]    	wexp;           	// Exponent of result
+	output logic				sumof;          // X*Y+Z exponent out of bounds 
+	output logic				sumuf;         // X*Y+Z exponent underflows 

 	//   Internal nodes

--- a/wally-pipelined/src/fpu/flag1.sv
+++ b/wally-pipelined/src/fpu/flag1.sv
@ -11,15 +11,15 @@
 module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
 /////////////////////////////////////////////////////////////////////////////

-	input                  		xnanE;        	// X is NaN 
-	input                  		ynanE;        	// Y is NaN 
-	input                 		znanE;       	// Z is NaN
-	input                  		prodof;         // X*Y overflows exponent
-	output				nanE;		// Some	source is NaN
+	input logic                  		xnanE;        	// X is NaN 
+	input logic                  		ynanE;        	// Y is NaN 
+	input logic                 		znanE;       	// Z is NaN
+	input logic                  		prodof;         // X*Y overflows exponent
+	output logic				nanE;		// Some	source is NaN
 
 	//   Internal nodes

-	output				prodinfE;	// X*Y larger than max possible
+	output logic				prodinfE;	// X*Y larger than max possible

 	// If any input is NaN, propagate the NaN 

--- a/wally-pipelined/src/fpu/flag2.sv
+++ b/wally-pipelined/src/fpu/flag2.sv
@ -13,27 +13,27 @@ module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof,
 			 inf, nanM, FmaFlagsM,sticky,prodinfM);
 /////////////////////////////////////////////////////////////////////////////

-	input                  		xnanM;        	// X is NaN 
-	input                  		ynanM;        	// Y is NaN 
-	input                 		znanM;       	// Z is NaN 
-	input				xsign; 		// Sign of z
-	input				ysign; 		// Sign of z
-	input				zsign; 		// Sign of z
-	input                  		sticky;        	// X is Inf
-    input                       prodinfM;
-	input                  		xinfM;        	// X is Inf
-	input                 		yinfM;       	// Y is Inf 
-	input                  		zinfM;        	// Z is Inf
-	input                  		sumof;          // X*Y + z underflows exponent
-	input                  		sumuf;          // X*Y + z underflows exponent
-	input				xzeroM;		// x = 0
-	input				yzeroM;		// y = 0
-	input				zzeroM;		// y = 0
-	input				killprodM;
-	input     	[1:0]  		vbits;		// R and S bits of result
-	output				inf;		// Some	source is Inf
-	output				nanM;		// Some	source is NaN
-	output		[4:0]	FmaFlagsM;
+	input logic                 		xnanM;        	// X is NaN 
+	input logic                 		ynanM;        	// Y is NaN 
+	input logic                		znanM;       	// Z is NaN 
+	input logic				xsign; 		// Sign of z
+	input logic			ysign; 		// Sign of z
+	input logic			zsign; 		// Sign of z
+	input logic                 		sticky;        	// X is Inf
+    input     logic                  prodinfM;
+	input logic                 		xinfM;        	// X is Inf
+	input logic                		yinfM;       	// Y is Inf 
+	input logic                 		zinfM;        	// Z is Inf
+	input logic                 		sumof;          // X*Y + z underflows exponent
+	input logic                 		sumuf;          // X*Y + z underflows exponent
+	input logic				xzeroM;		// x = 0
+	input logic				yzeroM;		// y = 0
+	input logic				zzeroM;		// y = 0
+	input logic				killprodM;
+	input logic     	[1:0]  		vbits;		// R and S bits of result
+	output logic				inf;		// Some	source is Inf
+	input logic				nanM;		// Some	source is NaN
+	output logic		[4:0]	FmaFlagsM;
 
 	//   Internal nodes

--- a/wally-pipelined/src/fpu/fma1.sv
+++ b/wally-pipelined/src/fpu/fma1.sv
@ -34,34 +34,34 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
 			, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
 			xinfE, yinfE, zinfE, nanE, prodinfE);
 /////////////////////////////////////////////////////////////////////////////
- 
-	input 		[63:0]		ReadData1E;		// input 1
-	input		[63:0]		ReadData2E;     // input 2 
-	input 		[63:0]		ReadData3E;     // input 3
-	input 		[2:0]	 	FrmE;          	// Rounding mode
-	output 		[12:0]		aligncntE;    	// status flags
-	output 		[105:0]		rE; 				// one result of partial product sum
-	output 		[105:0]		sE; 				// other result of partial products
-	output 		[163:0]		tE;				// output of alignment shifter	
-	output 		[12:0]		aeE; 		// multiplier expoent
-	output 					bsE;				// sticky bit of addend
-	output 					killprodE; 		// ReadData3E >> product
-	output					xzeroE;
-	output					yzeroE;
-	output					zzeroE;
-	output					xdenormE;
-	output					ydenormE;
-	output					zdenormE;
-	output					xinfE;
-	output					yinfE;
-	output					zinfE;
-	output					xnanE;
-	output					ynanE;
-	output					znanE;
-	output					nanE;
-	output					prodinfE;
-	output			[8:0]		sumshiftE;
-	output					sumshiftzeroE;
+ //***clean up code, comment, fix names, and c3f000200003fffe * 0000000000000001 + 001ffffffffffffe error
+	input logic 		[63:0]		ReadData1E;		// input 1
+	input logic		[63:0]		ReadData2E;     // input 2 
+	input logic 		[63:0]		ReadData3E;     // input 3
+	input logic 		[2:0]	 	FrmE;          	// Rounding mode
+	output logic 		[12:0]		aligncntE;    	// status flags
+	output logic 		[105:0]		rE; 				// one result of partial product sum
+	output logic 		[105:0]		sE; 				// other result of partial products
+	output logic 		[163:0]		tE;				// output of alignment shifter	
+	output logic 		[12:0]		aeE; 		// multiplier expoent
+	output logic 					bsE;				// sticky bit of addend
+	output logic 					killprodE; 		// ReadData3E >> product
+	output logic					xzeroE;
+	output logic					yzeroE;
+	output logic					zzeroE;
+	output logic					xdenormE;
+	output logic					ydenormE;
+	output logic					zdenormE;
+	output logic					xinfE;
+	output logic					yinfE;
+	output logic					zinfE;
+	output logic					xnanE;
+	output logic					ynanE;
+	output logic					znanE;
+	output logic					nanE;
+	output logic					prodinfE;
+	output logic			[8:0]		sumshiftE;
+	output logic					sumshiftzeroE;

 // Internal nodes
 
--- a/wally-pipelined/src/fpu/fma2.sv
+++ b/wally-pipelined/src/fpu/fma2.sv
@ -38,40 +38,37 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,

 );
 /////////////////////////////////////////////////////////////////////////////
- 
-	input 		[63:0]		ReadData1M;		// input 1
-	input		[63:0]		ReadData2M;     // input 2 
-	input 		[63:0]		ReadData3M;     // input 3
-	input 		[2:0]	 	FrmM;          	// Rounding mode
-	input 		[12:0]		aligncntM;    	// status flags
-	input 		[105:0]		rM; 				// one result of partial product sum
-	input 		[105:0]		sM; 				// other result of partial products
-	input 		[163:0]		tM;				// output of alignment shifter	
-	input 		[8:0]		normcntM; 		// shift count for normalizer
-	input 		[12:0]		aeM; 		// multiplier expoent
-	input 					bsM;				// sticky bit of addend
-	input 					killprodM; 		// ReadData3M >> product
-	input					prodinfM;
-	input					xzeroM;
-	input					yzeroM;
-	input					zzeroM;
-	input					xdenormM;
-	input					ydenormM;
-	input					zdenormM;
-	input					xinfM;
-	input					yinfM;
-	input					zinfM;
-	input					xnanM;
-	input					ynanM;
-	input					znanM;
-	input					nanM;
-	input			[8:0]		sumshiftM;
-	input					sumshiftzeroM;
-
-
-	input 		[63:0]		FmaResultM;     // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
-	output 		[4:0]		FmaFlagsM;    	// status flags
-	
+  
+	input logic	 	[63:0]		ReadData1M;		// input 1
+	input logic	 	[63:0]		ReadData2M;     // input 2 
+	input logic		[63:0]		ReadData3M;     // input 3
+	input logic		[2:0]	 	FrmM;          	// Rounding mode
+	input logic		[12:0]		aligncntM;    	// status flags
+	input logic 	[105:0]		rM; 				// one result of partial product sum
+	input logic		[105:0]		sM; 				// other result of partial products
+	input logic		[163:0]		tM;				// output of alignment shifter	
+	input logic		[8:0]		normcntM; 		// shift count for normalizer
+	input logic		[12:0]		aeM; 		// multiplier expoent
+	input logic					bsM;				// sticky bit of addend
+	input logic 				killprodM; 		// ReadData3M >> product
+	input logic					prodinfM;
+	input logic					xzeroM;
+	input logic					yzeroM;
+	input logic					zzeroM;
+	input logic					xdenormM;
+	input logic					ydenormM;
+	input logic					zdenormM;
+	input logic					xinfM;
+	input logic					yinfM;
+	input logic					zinfM;
+	input logic					xnanM;
+	input logic					ynanM;
+	input logic					znanM;
+	input logic					nanM;
+	input logic		[8:0]		sumshiftM;
+	input logic					sumshiftzeroM;
+	output logic		[63:0]		FmaResultM;     // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
+	output logic		[4:0]		FmaFlagsM;    	// status flags

 // Internal nodes
 	logic 		[163:0]		sum;			// output of carry prop adder
--- a/wally-pipelined/src/fpu/fpucmp1.sv
+++ b/wally-pipelined/src/fpu/fpucmp1.sv
@ -208,7 +208,6 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
   output logic 	      BNaN;
   output logic               Azero;
   output logic               Bzero;
-   logic [62:0]       sixtythreezeros = 63'h0;

   assign dp = !Sel[1]&!Sel[0];
   assign sp = !Sel[1]&Sel[0];
@ -229,7 +228,7 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
   // the 63 least siginficant bits of A are zero). 
   // Depending on how this synthesizes, it may work better to replace
   // this with assign Azero = ~(A[62] | A[61] | ... | A[0])
-   assign Azero = (A[62:0] == sixtythreezeros);
-   assign Bzero = (B[62:0] == sixtythreezeros);
+   assign Azero = (A[62:0] == 63'h0);
+   assign Bzero = (B[62:0] == 63'h0);

 endmodule // exception_cmp
--- a/wally-pipelined/src/fpu/lza.sv
+++ b/wally-pipelined/src/fpu/lza.sv
@ -12,22 +12,21 @@
 module lza(sum, normcnt, sumzero); 
 /////////////////////////////////////////////////////////////////////////////
 
-	input     	[163:0]  	sum;            // sum
-	output     	[8:0]		normcnt;		// normalization shift count
-	output     		  		sumzero;		// sum = 0
+	input logic     	[163:0]  	sum;            // sum
+	output logic     	[8:0]		normcnt;		// normalization shift count
+	output logic     		  		sumzero;		// sum = 0

 	// Internal nodes

 	reg			[8:0] 		i;				// loop index
-	reg			[8:0] 		normcnt;		// normalization shift count
- 
+	
 	// A real LOP uses a fast carry chain to find only the first 0.
 	// It is an example of a parallel prefix algorithm.  For the sake
 	// of simplicity,  this model is behavioral instead.
 	// A real LOP would also operate on the sources of the adder, not
 	// the result!

-	always @ ( sum)
+	always_comb
 		begin
 			i =   0;
 			while (~sum[163-i] && i <= 163) i = i+1;  // search for leading one 
--- a/wally-pipelined/src/fpu/multiply.sv
+++ b/wally-pipelined/src/fpu/multiply.sv
@ -2,14 +2,14 @@
 module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE); 
 /////////////////////////////////////////////////////////////////////////////

-	input 		[51:0]		xman;				// Fraction of multiplicand	x
-	input		[51:0]		yman;				// Fraction of multiplicand y	
-	input					xdenormE;		// is x denormalized	
-	input					ydenormE;		// is y denormalized	
-	input     			xzeroE;		// Z is denorm
-	input     			yzeroE;		// Z is denorm
-	output		[105:0]		rE;				//	partial product 1	
-	output		[105:0]		sE;				//	partial product 2	
+	input logic 		[51:0]		xman;				// Fraction of multiplicand	x
+	input logic		[51:0]		yman;				// Fraction of multiplicand y	
+	input logic					xdenormE;		// is x denormalized	
+	input logic					ydenormE;		// is y denormalized	
+	input logic     			xzeroE;		// Z is denorm
+	input logic     			yzeroE;		// Z is denorm
+	output logic		[105:0]		rE;				//	partial product 1	
+	output logic		[105:0]		sE;				//	partial product 2	
    
     wire        [54:0]      yExt; //y with appended 0 and assumed 1
     wire        [53:0]      xExt; //y with assumed 1
--- a/wally-pipelined/src/fpu/normalize.sv
+++ b/wally-pipelined/src/fpu/normalize.sv
@ -17,35 +17,31 @@
 module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero, 
 				xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v); 
 /////////////////////////////////////////////////////////////////////////////
-	input     	[163:0]  	sum;            // sum
-	input     	[62:52]  	zexp;            // sum
-	input		[8:0] 		normcnt;     	// normalization shift count
-	input		[12:0] 		aeM;     	// normalization shift count
-	input		[12:0] 		aligncntM;     	// normalization shift count
-	input		[8:0] 		sumshiftM;     	// normalization shift count
-	input				sumshiftzeroM;
-	input				sumzero;	// sum is zero
-	input				bsM;		// sticky bit for addend
-	input                  		xdenormM;        // Input Z is denormalized
-	input                  		ydenormM;        // Input Z is denormalized
-	input                  		zdenormM;        // Input Z is denormalized
-	input				xzeroM;
-	input				yzeroM;
-	input				zzeroM;
-	output				sticky;		//sticky bit
-	output		[12:0]		de0;
-	output                  	resultdenorm;        // Input Z is denormalized
-	output		[53:0]		v;		// normalized sum, R, S bits
+	input logic     	[163:0]  	sum;            // sum
+	input logic     	[62:52]  	zexp;            // sum
+	input logic		[8:0] 		normcnt;     	// normalization shift count
+	input logic		[12:0] 		aeM;     	// normalization shift count
+	input logic		[12:0] 		aligncntM;     	// normalization shift count
+	input logic		[8:0] 		sumshiftM;     	// normalization shift count
+	input logic				sumshiftzeroM;
+	input logic				sumzero;	// sum is zero
+	input logic				bsM;		// sticky bit for addend
+	input logic                  		xdenormM;        // Input Z is denormalized
+	input logic                  		ydenormM;        // Input Z is denormalized
+	input logic                  		zdenormM;        // Input Z is denormalized
+	input logic				xzeroM;
+	input logic				yzeroM;
+	input logic				zzeroM;
+	output logic				sticky;		//sticky bit
+	output logic		[12:0]		de0;
+	output logic                  	resultdenorm;        // Input Z is denormalized
+	output logic		[53:0]		v;		// normalized sum, R, S bits

 	// Internal nodes

-	reg       	[53:0]     	v;           	// normalized sum, R, S bits 
-	logic                  	resultdenorm;        // Input Z is denormalized
-	logic 		[12:0]	de0;
-	logic       	[163:0]  	sumshifted;     // shifted sum
+logic       	[163:0]  	sumshifted;     // shifted sum
 	logic		[9:0]		sumshifttmp;
 	logic       	[163:0]  	sumshiftedtmp;     // shifted sum
-	logic 				sticky;
 	logic				isShiftLeft1;
 logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;

@ -62,7 +58,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 
 	assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
 	assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
-	always @(sum or sumshiftM or aeM or aligncntM or normcnt or bsM or isShiftLeft1 or zexp or zdenormM)
+	always_comb
 		begin
 		// d = aligncntM
 		// l = normcnt
--- a/wally-pipelined/src/fpu/round.sv
+++ b/wally-pipelined/src/fpu/round.sv
@ -19,23 +19,23 @@ module round(v, sticky, FrmM, wsign,
 			  wman, infinity, specialsel,expplus1);
 /////////////////////////////////////////////////////////////////////////////

-	input		[53:0]		v;		// normalized sum, R, S bits
-	input				sticky;		//sticky bit
-	input		[2:0]	FrmM;
-	input				wsign;		// Sign of result
-	input 		[4:0]	FmaFlagsM;
-	input				inf;		// Some input is infinity
-	input				nanM;		// Some input is NaN
-	input				xnanM;		// X is NaN
-	input				ynanM;		// Y is NaN
-	input				znanM;		// Z is NaN
-	input		[51:0]		xman;		// Input X
-	input		[51:0]		yman;		// Input Y
-	input		[51:0]		zman;		// Input Z
-	output		[51:0]		wman; 		// rounded result of FMAC
-	output				infinity;    	// Generate infinity on overflow
-	output				specialsel;  	// Select special result
-	output				expplus1;
+	input logic		[53:0]		v;		// normalized sum, R, S bits
+	input logic				sticky;		//sticky bit
+	input logic		[2:0]	FrmM;
+	input logic				wsign;		// Sign of result
+	input logic 		[4:0]	FmaFlagsM;
+	input logic				inf;		// Some input is infinity
+	input logic				nanM;		// Some input is NaN
+	input logic				xnanM;		// X is NaN
+	input logic				ynanM;		// Y is NaN
+	input logic				znanM;		// Z is NaN
+	input logic		[51:0]		xman;		// Input X
+	input logic		[51:0]		yman;		// Input Y
+	input logic		[51:0]		zman;		// Input Z
+	output logic		[51:0]		wman; 		// rounded result of FMAC
+	output logic				infinity;    	// Generate infinity on overflow
+	output logic				specialsel;  	// Select special result
+	output logic				expplus1;

 	// Internal nodes

@ -56,7 +56,7 @@ module round(v, sticky, FrmM, wsign,
 	//	0xx - do nothing
 	//	100 - tie - plus1 if v[2] = 1
 	//	101/110/111 - plus1
-	always @ (FrmM, v, wsign, sticky) begin
+	always_comb begin
 		case (FrmM)
 			3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
 			3'b001: plus1 = 0;//round to zero
--- a/wally-pipelined/src/fpu/sbtm.sv
+++ b/wally-pipelined/src/fpu/sbtm.sv
@ -11,7 +11,8 @@ module sbtm (input logic [11:0] a, output logic [10:0] ia_out);
   // input to CPA
   logic [14:0] op1;
   logic [14:0] op2;
-   logic [14:0] p;   
+   logic [14:0] p; 
+   logic cout;  

   assign x0 = a[10:7];
   assign x1 = a[6:4];
--- a/wally-pipelined/src/fpu/sign.sv
+++ b/wally-pipelined/src/fpu/sign.sv
@ -14,32 +14,28 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
 			 sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
 ////////////////////////////////////////////////////////////////////////////I
 
-	input					xsign;			// Sign of X 
-	input					ysign;			// Sign of Y 
-	input					zsign;			// Sign of Z
-	input					isAdd;
-	input					negsum0;		// Sum in +O mode is negative 
-	input					negsum1;		// Sum in +1 mode is negative 
-	input					bsM;				// sticky bit from addend
-	input		[2:0]		FrmM;				// Round toward minus infinity
-	input		[4:0]		FmaFlagsM;				// Round toward minus infinity
-	input					sumzero;		// Sum = O
-	input					zinfM;			// Y = Inf
-	input					inf;			// Some input = Inf
-	output					wsign;			// Sign of W 
-	output					invz;			// Invert addend into adder
-	output					negsum;			// Negate result of adder
-	output					selsum1;		// Select +1 mode from compound adder
+	input logic					xsign;			// Sign of X 
+	input logic					ysign;			// Sign of Y 
+	input logic					zsign;			// Sign of Z
+	input logic					isAdd;
+	input logic					negsum0;		// Sum in +O mode is negative 
+	input logic					negsum1;		// Sum in +1 mode is negative 
+	input logic					bsM;				// sticky bit from addend
+	input logic		[2:0]		FrmM;				// Round toward minus infinity
+	input logic		[4:0]		FmaFlagsM;				// Round toward minus infinity
+	input logic					sumzero;		// Sum = O
+	input logic					zinfM;			// Y = Inf
+	input logic					inf;			// Some input = Inf
+	output logic					wsign;			// Sign of W 
+	output logic					invz;			// Invert addend into adder
+	output logic					negsum;			// Negate result of adder
+	output logic					selsum1;		// Select +1 mode from compound adder
 
 	// Internal nodes

 	wire					zerosign;    	// sign if result= 0 
 	wire					sumneg;    	// sign if result= 0 
 	wire					infsign;     	// sign if result= Inf 
-	reg						negsum;         // negate result of adder 
-	reg						selsum1;     	// select +1 mode from compound adder 
-logic tmp;
-
 	// Compute sign of product 

 	assign psign = xsign ^ ysign;
--- a/wally-pipelined/src/fpu/special.sv
+++ b/wally-pipelined/src/fpu/special.sv
@ -14,21 +14,21 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
 				xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
 /////////////////////////////////////////////////////////////////////////////

-	input   	[63:0]     	ReadData1E;              // Input ReadData1E
-	input     	[63:0]     	ReadData2E;           	// Input ReadData2E
-	input      	[63:0]    	ReadData3E;            	// Input ReadData3E 
-	output				xzeroE;		// Input ReadData1E = 0
-	output				yzeroE;		// Input ReadData2E = 0
-	output				zzeroE;		// Input ReadData3E = 0
-	output				xnanE;		// ReadData1E is NaN
-	output				ynanE;		// ReadData2E is NaN
-	output				znanE;		// ReadData3E is NaN
-	output				xdenormE;	// ReadData1E is denormalized
-	output				ydenormE;	// ReadData2E is denormalized
-	output				zdenormE;	// ReadData3E is denormalized
-	output				xinfE;		// ReadData1E is infinity
-	output				yinfE;		// ReadData2E is infinity
-	output				zinfE;		// ReadData3E is infinity
+	input logic   	[63:0]     	ReadData1E;              // Input ReadData1E
+	input logic     	[63:0]     	ReadData2E;           	// Input ReadData2E
+	input logic      	[63:0]    	ReadData3E;            	// Input ReadData3E 
+	output logic				xzeroE;		// Input ReadData1E = 0
+	output logic				yzeroE;		// Input ReadData2E = 0
+	output logic				zzeroE;		// Input ReadData3E = 0
+	output logic				xnanE;		// ReadData1E is NaN
+	output logic				ynanE;		// ReadData2E is NaN
+	output logic				znanE;		// ReadData3E is NaN
+	output logic				xdenormE;	// ReadData1E is denormalized
+	output logic				ydenormE;	// ReadData2E is denormalized
+	output logic			zdenormE;	// ReadData3E is denormalized
+	output logic				xinfE;		// ReadData1E is infinity
+	output logic				yinfE;		// ReadData2E is infinity
+	output logic				zinfE;		// ReadData3E is infinity

 	// In the actual circuit design, the gates looking at bits
 	// 51:0 and at bits 62:52 should be shared among the various detectors.