Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main

2021-05-03 09:23:52 -04:00 · 2021-05-03 09:23:52 -04:00 · 2368b58cc9
commit 2368b58cc9
parent 1fcd43e844 b98bc89f76
57 changed files with 1161 additions and 836 deletions
--- a/wally-pipelined/src/fpu/FMA/add.sv
+++ b/wally-pipelined/src/fpu/FMA/add.sv
@ -15,16 +15,16 @@ module add(rM, sM, tM, sum,
 		   negsum, invz, selsum1, negsum0, negsum1, killprodM);
 ////////////////////////////////////////////////////////////////////////////////
-	input 		[105:0]		rM;     			// partial product 1
+	input logic 		[105:0]		rM;     			// partial product 1
-	input 		[105:0]		sM;              // partial product 2
+	input logic 		[105:0]		sM;              // partial product 2
-	input 		[163:0]		tM;             	// aligned addend 
+	input logic 		[163:0]		tM;             	// aligned addend 
-	input					invz;       	// invert addend
+	input logic					invz;       	// invert addend
-	input 					selsum1;    	// select +1 mode of compound adder 
+	input logic 					selsum1;    	// select +1 mode of compound adder 
-	input					killprodM;    	// z >> product
+	input logic					killprodM;    	// z >> product
-	input					negsum;      	// Negate sum 
+	input logic					negsum;      	// Negate sum 
-	output		[163:0]		sum;         	// sum
+	output logic		[163:0]		sum;         	// sum
-	output					negsum0;     	// sum was negative in +0 mode
+	output logic					negsum0;     	// sum was negative in +0 mode
-	output					negsum1;     	// sum was negative in +1 mode 
+	output logic					negsum1;     	// sum was negative in +1 mode 
 	// Internal nodes
@ -34,6 +34,7 @@ module add(rM, sM, tM, sum,
 	wire		[164:0] 	sum0;			// sum of compound adder +0 mode
 	wire		[164:0] 	sum1;			// sum of compound adder +1 mode
 	wire		[163:0] 	prodshifted;			// sum of compound adder +1 mode
 	wire		[164:0] 	tmp;			// sum of compound adder +1 mode
 	// Invert addend if z'sM sign is diffrent from the product'sM sign
@ -44,11 +45,13 @@ module add(rM, sM, tM, sum,
 	assign r2 = killprodM ? 106'b0 : rM;
 	assign s2 = killprodM ? 106'b0 : sM;
 	//***replace this with a more structural cpa that synthisises better
 	// Compound adder
 	// Consists of 3:2 CSA followed by long compound CPA
-	assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
+	//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
-	assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
+	//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
-	assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above
+	assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
 	assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
 	// Check sign bits in +0/1 modes 
 	assign negsum0 = sum0[164];
@ -59,3 +62,4 @@ module add(rM, sM, tM, sum,
 	assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
 endmodule
--- a/wally-pipelined/src/fpu/FMA/align.sv
+++ b/wally-pipelined/src/fpu/FMA/align.sv
@ -15,79 +15,63 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
             killprodE,  sumshiftE, sumshiftzeroE);
 /////////////////////////////////////////////////////////////////////////////
-	input 		[51:0]		zman;		// Fraction of addend z;
+	input logic 		[51:0]		zman;		// Fraction of addend z;
-	input 		[12:0]		aligncntE;	// amount to shift
+	input logic 		[12:0]		aligncntE;	// amount to shift
-	input				xzeroE;		// Input X = 0
+	input logic				xzeroE;		// Input X = 0
-	input                  		yzeroE;          // Input Y = 0 
+	input logic                  		yzeroE;          // Input Y = 0 
-	input                  		zzeroE;          // Input Z = 0
+	input logic                  		zzeroE;          // Input Z = 0
-	input                  		zdenormE;        // Input Z is denormalized
+	input logic                  		zdenormE;        // Input Z is denormalized
-	output    	[163:0]    	tE;              // aligned addend (54 bits left of bpt)
+	output logic    	[163:0]    	tE;              // aligned addend (54 bits left of bpt)
-	output          		bsE;           	// sticky bit of addend
+	output logic          		bsE;           	// sticky bit of addend
-	output          		killprodE;    	// Z >> product
+	output logic          		killprodE;    	// Z >> product
-	output		[7:0]		sumshiftE;	
+	output logic		[8:0]		sumshiftE;	
-	output				sumshiftzeroE;
+	output logic				sumshiftzeroE;
 	// Internal nodes
 	reg       	[163:0]   	tE;				// aligned addend from shifter
 	reg       	[215:0]   	shift;				// aligned addend from shifter
-	reg             		killprodE;			// Z >> product 
+	logic 		[12:0]		tmp;
 	reg             		bsE;				// sticky bit of addend
 	reg             		ps;				// sticky bit of product
 	reg             		zexpsel;				// sticky bit of product
 	reg       	[7:0]		i;				// temp storage for finding sticky bit
 	wire		[52:0]		z1;				// Z plus 1
 	wire		[51:0]		z2;				// Z selected after handling rounds
 	wire		[11:0]		align104;			// alignment count + 104
 	logic		[8:0]		sumshiftE;
 	logic sumshiftzeroE;
-	// Compute sign of aligncntE + 104 to check for shifting too far right 
+	always_comb 
 	//assign align104 = aligncntE+104;
 	// Shift addend by alignment count.  Generate sticky bits from
 	// addend on right shifts.  Handle special cases of shifting
 	// by too much.
 	always @(aligncntE or xzeroE or yzeroE or zman or zdenormE or zzeroE)
 		begin
 		// Default to clearing sticky bits 
 		bsE = 0;
 		ps = 0;
 		// And to using product as primary operand in adder I exponent gen 
 		killprodE = xzeroE | yzeroE;
 		// d = aligncntE
 		// p = 53
-		if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1
+		//***try reducing this hardware to use one shifter
 		if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
 			//product ancored case with saturated shift
 			sumshiftE = 163;	// 3p+4	
 			sumshiftzeroE = 0;
 			shift = {1'b1,zman,163'b0} >> sumshiftE;
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
-			//zexpsel = 0;
+
-		end else if($signed(aligncntE) <= $signed(2))  begin // -2p+1<d<=2
+		end else if($signed(aligncntE) <= $signed(13'd2))  begin // -2p+1<d<=2
 			// product ancored or cancellation
-			sumshiftE = 57-aligncntE; // p + 2 - d  
+			tmp = 13'd57-aligncntE;
 			sumshiftE = tmp[8:0]; // p + 2 - d  
 			sumshiftzeroE = 0;
 			shift = {~zdenormE,zman,163'b0} >> sumshiftE;
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
-			//zexpsel = 0;
+
-		end else if ($signed(aligncntE)<=$signed(55))  begin // 2 < d <= p+2
+		end else if ($signed(aligncntE)<=$signed(13'd55))  begin // 2 < d <= p+2
 			// addend ancored case
-			// used to be 56 \/ somthing doesn'tE seem right too many typos
+			// used to be 56 \/ somthing doesn't seem right too many typos
-			sumshiftE = 57-aligncntE;
+			tmp = 13'd57-aligncntE;
 			sumshiftE = tmp[8:0]; 
 			sumshiftzeroE = 0;
 			shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
-			//zexpsel = 1;
+
 		end else begin                 	// d >= p+3
 			// addend anchored case with saturated shift
 			sumshiftE = 0;	
@ -96,15 +80,9 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
 			killprodE = 1;
 			//ps = 1;
 			//zexpsel = 1;
 		// use some behavioral code to find sticky bit.  This is really
 		// done by hardware in the shifter.
 		//if (aligncntE < 0)
 		//	for (i=0; i<-aligncntE-52;  i = i+1)
 		//		bsE = bsE || z2[i];
 		end 
 	end
 endmodule
--- a/wally-pipelined/src/fpu/FMA/booth.sv
+++ b/wally-pipelined/src/fpu/FMA/booth.sv
@ -1,21 +1,19 @@
 module booth(xExt, choose, add1, e, pp); 
 /////////////////////////////////////////////////////////////////////////////
-	input 		[53:0]		xExt;				// multiplicand	xExt
+	input logic 		[53:0]		xExt;				// multiplicand	xExt
-	input		[2:0]		choose;				// bits needed to choose which encoding
+	input logic		[2:0]		choose;				// bits needed to choose which encoding
-	output		[1:0]       	add1;				// do you add 1	
+	output logic		[1:0]       	add1;				// do you add 1	
-    output                  e;
+    output logic                  e;
-	output		[54:0]		pp;				//	the resultant encoding
+	output logic		[54:0]		pp;				//	the resultant encoding
-    logic [54:0] pp, temp;
+    logic [54:0] temp;
    logic e;
    logic [1:0] add1;
    logic [53:0] negx;
    //logic temp;
    assign negx = ~xExt;
-    always @(choose, xExt, negx)
+    always_comb
    case (choose)
        3'b000 : pp = 55'b0;   //  0
        3'b001 : pp = {1'b0, xExt};  //  1
@ -24,10 +22,10 @@ module booth(xExt, choose, add1, e, pp);
        3'b100 : pp = {negx, 1'b0};  // -2
        3'b101 : pp = {1'b1, negx};  // -1
        3'b110 : pp = {1'b1, negx};  // -1
-        3'b111 : pp = 55'hfffffffffffffff;  //  -0
+        3'b111 : pp = '1;  //  -0
    endcase
-    always @(choose, xExt, negx)
+    always_comb
    case (choose)
        3'b000 : e = 0;   //  0
        3'b001 : e = 0;  //  1
@ -40,7 +38,7 @@ module booth(xExt, choose, add1, e, pp);
    endcase
    // assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
    // assign add1 = choose[2];
-    always @(choose)
+    always_comb
    case (choose)
        3'b000 : add1 = 2'b0;   //  0
        3'b001 : add1 = 2'b0;  //  1
--- a/wally-pipelined/src/fpu/FMA/compressors.sv
+++ b/wally-pipelined/src/fpu/FMA/compressors.sv
@ -3,11 +3,11 @@ module add3comp2(a, b, c, carry, sum);
 //look into diffrent implementations of the compressors?
    parameter BITS = 4;
-	input 		[BITS-1:0]		a;
+	input logic 		[BITS-1:0]		a;
-	input		[BITS-1:0]		b;
+	input logic		[BITS-1:0]		b;
-	input		[BITS-1:0]    	c;
+	input logic		[BITS-1:0]    	c;
-    output      [BITS-1:0]      carry;
+    output logic      [BITS-1:0]      carry;
-	output		[BITS-1:0]		sum;
+	output logic		[BITS-1:0]		sum;
    genvar i;
    generate
@ -22,12 +22,12 @@ module add4comp2(a, b, c, d, carry, sum);
 /////////////////////////////////////////////////////////////////////////////
    parameter BITS = 4;
-	input 		[BITS-1:0]		a;
+	input logic 		[BITS-1:0]		a;
-	input		[BITS-1:0]		b;
+	input logic		[BITS-1:0]		b;
-	input		[BITS-1:0]    	c;
+	input logic		[BITS-1:0]    	c;
-	input		[BITS-1:0]    	d;
+	input logic		[BITS-1:0]    	d;
-    output      [BITS:0]      carry;
+    output logic      [BITS:0]      carry;
-	output		[BITS-1:0]		sum;
+	output logic		[BITS-1:0]		sum;
    logic       [BITS-1:0]      cout;
    logic                       carryTmp;
@ -54,11 +54,11 @@ module sng3comp2(a, b, c, carry, sum);
 /////////////////////////////////////////////////////////////////////////////
 //look into diffrent implementations of the compressors?
-	input 				a;
+	input logic 				a;
-	input				b;
+	input logic				b;
-	input		       	c;
+	input logic		       	c;
-    output              carry;
+    output logic              carry;
-	output				sum;
+	output logic				sum;
    logic               axorb;
@ -73,14 +73,14 @@ module sng4comp2(a, b, c, d, cin, cout, carry, sum);
 /////////////////////////////////////////////////////////////////////////////
 //look into pass gate 4:2 counters?
-	input 				a;
+	input logic 				a;
-	input				b;
+	input logic				b;
-	input		       	c;
+	input logic		       	c;
-    input               d;
+    input logic               d;
-    input               cin;
+    input logic               cin;
-    output              cout;
+    output logic              cout;
-    output              carry;
+    output logic              carry;
-	output				sum;
+	output logic				sum;
    logic               TmpSum;
--- a/wally-pipelined/src/fpu/FMA/expgen1.sv
+++ b/wally-pipelined/src/fpu/FMA/expgen1.sv
@ -20,17 +20,17 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 			   aligncntE, prodof, aeE);
 /////////////////////////////////////////////////////////////////////////////
-	input     	[62:52]    	xexp;           	// Exponent of multiplicand x
+	input logic     	[62:52]    	xexp;           	// Exponent of multiplicand x
-	input     	[62:52]  	yexp;         		// Exponent of multiplicand y
+	input logic     	[62:52]  	yexp;         		// Exponent of multiplicand y
-	input     	[62:52]  	zexp;           	// Exponent of addend z
+	input logic     	[62:52]  	zexp;           	// Exponent of addend z
-	input     			xdenormE;		// Z is denorm
+	input logic     			xdenormE;		// Z is denorm
-	input     			ydenormE;		// Z is denorm
+	input logic     			ydenormE;		// Z is denorm
-	input     			zdenormE;		// Z is denorm
+	input logic     			zdenormE;		// Z is denorm
-	input     			xzeroE;		// Z is denorm
+	input logic     			xzeroE;		// Z is denorm
-	input     			yzeroE;		// Z is denorm
+	input logic     			yzeroE;		// Z is denorm
-	output		[12:0]   	aligncntE;       // shift count for alignment shifter
+	output logic		[12:0]   	aligncntE;       // shift count for alignment shifter
-	output				prodof;         // X*Y exponent out of bounds 
+	output logic			prodof;         // X*Y exponent out of bounds 
-	output		[12:0]		aeE;				//exponent of multiply
+	output logic		[12:0]		aeE;				//exponent of multiply
 	//   Internal nodes
@ -50,7 +50,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 	//   if exponent is out of bounds 
-	assign aeE = xzeroE|yzeroE ? 0 : xexp + yexp -1023;
+	assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
 	assign prodof = (aeE > 2046 && ~aeE[12]);
@ -61,7 +61,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 	// is masked by the bypass mux and two 10 bit adder delays.
 	// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
 	// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
-	assign aligncntE = zexp -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
+	assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
 	//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
 	//assign aligncntE = zexp - aeE;// KEP use all of aeE
@ -87,3 +87,4 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 	// rounding mode.  NaNs are propagated or generated.
 endmodule
--- a/wally-pipelined/src/fpu/FMA/expgen2.sv
+++ b/wally-pipelined/src/fpu/FMA/expgen2.sv
@ -23,24 +23,24 @@ module expgen2(xexp, yexp, zexp,
 			   sumof, sumuf);
 /////////////////////////////////////////////////////////////////////////////
-	input     	[62:52]    	xexp;           	// Exponent of multiplicand x
+	input logic     	[62:52]    	xexp;           	// Exponent of multiplicand x
-	input     	[62:52]  	yexp;         		// Exponent of multiplicand y
+	input logic     	[62:52]  	yexp;         		// Exponent of multiplicand y
-	input     	[62:52]  	zexp;           	// Exponent of addend z
+	input logic     	[62:52]  	zexp;           	// Exponent of addend z
-	input     			sumzero;     	// sum exactly equals zero 
+	input logic     			sumzero;     	// sum exactly equals zero 
-	input     			resultdenorm;  // postnormalize rounded result
+	input logic     			resultdenorm;  // postnormalize rounded result
-	input     			infinity;    	// generate infinity on overflow 
+	input logic     			infinity;    	// generate infinity on overflow 
-	input     	[4:0]	FmaFlagsM;     	// Result invalid
+	input logic     	[4:0]	FmaFlagsM;     	// Result invalid
-	input     			inf;			// Some input is infinity
+	input logic     			inf;			// Some input is infinity
-	input     			nanM;			// Some input is NaN
+	input logic     			nanM;			// Some input is NaN
-	input     	[12:0]		de0;			// X is NaN NaN
+	input logic     	[12:0]		de0;			// X is NaN NaN
-	input     			xnanM;			// X is NaN
+	input logic     			xnanM;			// X is NaN
-	input     			ynanM;			// Y is NaN
+	input logic    			ynanM;			// Y is NaN
-	input     			znanM;			// Z is NaN 
+	input logic     			znanM;			// Z is NaN 
-	input				expplus1;
+	input logic				expplus1;
-	input     			specialsel;  	// Select special result
+	input logic     			specialsel;  	// Select special result
-	output		[62:52]    	wexp;           	// Exponent of result
+	output logic		[62:52]    	wexp;           	// Exponent of result
-	output				sumof;          // X*Y+Z exponent out of bounds 
+	output logic				sumof;          // X*Y+Z exponent out of bounds 
-	output				sumuf;         // X*Y+Z exponent underflows 
+	output logic				sumuf;         // X*Y+Z exponent underflows 
 	//   Internal nodes
@ -102,6 +102,7 @@ module expgen2(xexp, yexp, zexp,
 	// A mux selects the early result from other FPU blocks or the 
 	// normalized FMAC result.   Special cases are also detected. 
-	assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1; 
+	assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1}; 
 endmodule
--- a/wally-pipelined/src/fpu/FMA/flag1.sv
+++ b/wally-pipelined/src/fpu/FMA/flag1.sv
@ -11,17 +11,17 @@
 module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
 /////////////////////////////////////////////////////////////////////////////
-	input                  		xnanE;        	// X is NaN 
+	input logic                  		xnanE;        	// X is NaN 
-	input                  		ynanE;        	// Y is NaN 
+	input logic                  		ynanE;        	// Y is NaN 
-	input                 		znanE;       	// Z is NaN
+	input logic                 		znanE;       	// Z is NaN
-	input                  		prodof;         // X*Y overflows exponent
+	input logic                  		prodof;         // X*Y overflows exponent
-	output				nanE;		// Some	source is NaN
+	output logic				nanE;		// Some	source is NaN
 	//   Internal nodes
-	output				prodinfE;	// X*Y larger than max possible
+	output logic				prodinfE;	// X*Y larger than max possible
-	// If any input is NaN, propagate the NaN 
+	// If any input logic is NaN, propagate the NaN 
 	assign nanE = xnanE || ynanE || znanE;
--- a/wally-pipelined/src/fpu/FMA/flag2.sv
+++ b/wally-pipelined/src/fpu/FMA/flag2.sv
@ -13,27 +13,27 @@ module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof,
 			 inf, nanM, FmaFlagsM,sticky,prodinfM);
 /////////////////////////////////////////////////////////////////////////////
-	input                  		xnanM;        	// X is NaN 
+	input logic                  		xnanM;        	// X is NaN 
-	input                  		ynanM;        	// Y is NaN 
+	input logic                  		ynanM;        	// Y is NaN 
-	input                 		znanM;       	// Z is NaN 
+	input logic                 		znanM;       	// Z is NaN 
-	input				xsign; 		// Sign of z
+	input logic				xsign; 		// Sign of z
-	input				ysign; 		// Sign of z
+	input logic				ysign; 		// Sign of z
-	input				zsign; 		// Sign of z
+	input logic				zsign; 		// Sign of z
-	input                  		sticky;        	// X is Inf
+	input logic                  		sticky;        	// X is Inf
-    input                       prodinfM;
+    input logic                       prodinfM;
-	input                  		xinfM;        	// X is Inf
+	input logic                  		xinfM;        	// X is Inf
-	input                 		yinfM;       	// Y is Inf 
+	input logic                 		yinfM;       	// Y is Inf 
-	input                  		zinfM;        	// Z is Inf
+	input logic                  		zinfM;        	// Z is Inf
-	input                  		sumof;          // X*Y + z underflows exponent
+	input logic                  		sumof;          // X*Y + z underflows exponent
-	input                  		sumuf;          // X*Y + z underflows exponent
+	input logic                  		sumuf;          // X*Y + z underflows exponent
-	input				xzeroM;		// x = 0
+	input logic				xzeroM;		// x = 0
-	input				yzeroM;		// y = 0
+	input logic				yzeroM;		// y = 0
-	input				zzeroM;		// y = 0
+	input logic				zzeroM;		// y = 0
-	input				killprodM;
+	input logic				killprodM;
-	input     	[1:0]  		vbits;		// R and S bits of result
+	input logic     	[1:0]  		vbits;		// R and S bits of result
-	output				inf;		// Some	source is Inf
+	output logic				inf;		// Some	source is Inf
-	output				nanM;		// Some	source is NaN
+	input logic				nanM;		// Some	source is NaN
-	output		[4:0]	FmaFlagsM;
+	output logic		[4:0]	FmaFlagsM;
 	//   Internal nodes
@ -55,8 +55,8 @@ logic suminf;
 	assign FmaFlagsM[2] = suminf && ~inf;
 	// Set the underflow  flag for the following cases:
-	//   1) Any input is denormalized
+	//   1) Any input logic is denormalized
-	//   2)  Output would be denormalized or smaller
+	//   2)  output logic would be denormalized or smaller
 	assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
@ -70,7 +70,7 @@ logic suminf;
 	// Set invalid flag for following cases:
 	//   1) Inf - Inf
 	//   2) 0 * Inf
-	//   3) Output = NaN (this is not part of the IEEE spec,  only 486 proj)
+	//   3) output logic = NaN (this is not part of the IEEE spec,  only 486 proj)
 	assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
 					   xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
--- a/wally-pipelined/src/fpu/FMA/fma1.sv
+++ b/wally-pipelined/src/fpu/FMA/fma1.sv
@ -35,37 +35,37 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
 			xinfE, yinfE, zinfE, nanE, prodinfE);
 /////////////////////////////////////////////////////////////////////////////
-	input 		[63:0]		ReadData1E;		// input 1
+	input logic 		[63:0]		ReadData1E;		// input 1
-	input		[63:0]		ReadData2E;     // input 2 
+	input logic		[63:0]		ReadData2E;     // input 2 
-	input 		[63:0]		ReadData3E;     // input 3
+	input logic 		[63:0]		ReadData3E;     // input 3
-	input 		[2:0]	 	FrmE;          	// Rounding mode
+	input logic 		[2:0]	 	FrmE;          	// Rounding mode
-	output 		[12:0]		aligncntE;    	// status flags
+	output logic 		[12:0]		aligncntE;    	// status flags
-	output 		[105:0]		rE; 				// one result of partial product sum
+	output logic 		[105:0]		rE; 				// one result of partial product sum
-	output 		[105:0]		sE; 				// other result of partial products
+	output logic 		[105:0]		sE; 				// other result of partial products
-	output 		[163:0]		tE;				// output of alignment shifter	
+	output logic 		[163:0]		tE;				// output logic of alignment shifter	
-	output 		[12:0]		aeE; 		// multiplier expoent
+	output logic 		[12:0]		aeE; 		// multiplier expoent
-	output 					bsE;				// sticky bit of addend
+	output logic 					bsE;				// sticky bit of addend
-	output 					killprodE; 		// ReadData3E >> product
+	output logic 					killprodE; 		// ReadData3E >> product
-	output					xzeroE;
+	output logic					xzeroE;
-	output					yzeroE;
+	output logic					yzeroE;
-	output					zzeroE;
+	output logic					zzeroE;
-	output					xdenormE;
+	output logic					xdenormE;
-	output					ydenormE;
+	output logic					ydenormE;
-	output					zdenormE;
+	output logic					zdenormE;
-	output					xinfE;
+	output logic					xinfE;
-	output					yinfE;
+	output logic					yinfE;
-	output					zinfE;
+	output logic					zinfE;
-	output					xnanE;
+	output logic					xnanE;
-	output					ynanE;
+	output logic					ynanE;
-	output					znanE;
+	output logic					znanE;
-	output					nanE;
+	output logic					nanE;
-	output					prodinfE;
+	output logic					prodinfE;
-	output			[8:0]		sumshiftE;
+	output logic			[8:0]		sumshiftE;
-	output					sumshiftzeroE;
+	output logic					sumshiftzeroE;
 // Internal nodes
-//	output 		[12:0]		aligncntE; 		// shift count for alignment
+//	output logic 		[12:0]		aligncntE; 		// shift count for alignment
 	logic 					prodof; 		// ReadData1E*ReadData2E out of range
@ -95,7 +95,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
 	special			special(.*);
-// Instantiate control output
+// Instantiate control output logic
 flag1				flag1(.*); 
--- a/wally-pipelined/src/fpu/FMA/fma2.sv
+++ b/wally-pipelined/src/fpu/FMA/fma2.sv
@ -15,13 +15,13 @@
 //    normalize Normalization shifter
 //    round     Rounding of result
 //    exception Handles exceptional cases
-//    bypass    Handles bypass of result to ReadData1M or ReadData3M inputs
+//    bypass    Handles bypass of result to ReadData1M or ReadData3M input logics
 //    sign      One bit sign handling block 
-//    special   Catch special cases (inputs = 0  / infinity /  etc.) 
+//    special   Catch special cases (input logics = 0  / infinity /  etc.) 
 //
 //   The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
 //   RN, RZ, RM, or RP.  The result is optionally bypassed back to
-//   the ReadData1M or ReadData3M inputs for use on the next cycle.  In addition,  four signals
+//   the ReadData1M or ReadData3M input logics for use on the next cycle.  In addition,  four signals
 //   are produced: trap, overflow, underflow, and inexact.  Trap indicates
 //   an infinity, NaN, or denormalized number to be handled in software;
 //   the other three signals are IMMM flags.
@ -39,38 +39,38 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
 );
 /////////////////////////////////////////////////////////////////////////////
-	input 		[63:0]		ReadData1M;		// input 1
+	input logic 		[63:0]		ReadData1M;		// input logic 1
-	input		[63:0]		ReadData2M;     // input 2 
+	input logic		[63:0]		ReadData2M;     // input logic 2 
-	input 		[63:0]		ReadData3M;     // input 3
+	input logic 		[63:0]		ReadData3M;     // input logic 3
-	input 		[2:0]	 	FrmM;          	// Rounding mode
+	input logic 		[2:0]	 	FrmM;          	// Rounding mode
-	input 		[12:0]		aligncntM;    	// status flags
+	input logic 		[12:0]		aligncntM;    	// status flags
-	input 		[105:0]		rM; 				// one result of partial product sum
+	input logic 		[105:0]		rM; 				// one result of partial product sum
-	input 		[105:0]		sM; 				// other result of partial products
+	input logic 		[105:0]		sM; 				// other result of partial products
-	input 		[163:0]		tM;				// output of alignment shifter	
+	input logic 		[163:0]		tM;				// output of alignment shifter	
-	input 		[8:0]		normcntM; 		// shift count for normalizer
+	input logic 		[8:0]		normcntM; 		// shift count for normalizer
-	input 		[12:0]		aeM; 		// multiplier expoent
+	input logic 		[12:0]		aeM; 		// multiplier expoent
-	input 					bsM;				// sticky bit of addend
+	input logic 					bsM;				// sticky bit of addend
-	input 					killprodM; 		// ReadData3M >> product
+	input logic 					killprodM; 		// ReadData3M >> product
-	input					prodinfM;
+	input logic					prodinfM;
-	input					xzeroM;
+	input logic					xzeroM;
-	input					yzeroM;
+	input logic					yzeroM;
-	input					zzeroM;
+	input logic					zzeroM;
-	input					xdenormM;
+	input logic					xdenormM;
-	input					ydenormM;
+	input logic					ydenormM;
-	input					zdenormM;
+	input logic					zdenormM;
-	input					xinfM;
+	input logic					xinfM;
-	input					yinfM;
+	input logic					yinfM;
-	input					zinfM;
+	input logic					zinfM;
-	input					xnanM;
+	input logic					xnanM;
-	input					ynanM;
+	input logic					ynanM;
-	input					znanM;
+	input logic					znanM;
-	input					nanM;
+	input logic					nanM;
-	input			[8:0]		sumshiftM;
+	input logic			[8:0]		sumshiftM;
-	input					sumshiftzeroM;
+	input logic					sumshiftzeroM;
-	input 		[63:0]		FmaResultM;     // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
+	output logic 		[63:0]		FmaResultM;     // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
-	output 		[4:0]		FmaFlagsM;    	// status flags
+	output logic 		[4:0]		FmaFlagsM;    	// status flags
 // Internal nodes
--- a/wally-pipelined/src/fpu/FMA/lza.sv
+++ b/wally-pipelined/src/fpu/FMA/lza.sv
@ -12,14 +12,13 @@
 module lza(sum, normcnt, sumzero); 
 /////////////////////////////////////////////////////////////////////////////
-	input     	[163:0]  	sum;            // sum
+	input logic     	[163:0]  	sum;            // sum
-	output     	[8:0]		normcnt;		// normalization shift count
+	output logic     	[8:0]		normcnt;		// normalization shift count
-	output     		  		sumzero;		// sum = 0
+	output logic     		  		sumzero;		// sum = 0
 	// Internal nodes
 	reg			[8:0] 		i;				// loop index
 	reg			[8:0] 		normcnt;		// normalization shift count
 	// A real LOP uses a fast carry chain to find only the first 0.
 	// It is an example of a parallel prefix algorithm.  For the sake
@ -27,7 +26,7 @@ module lza(sum, normcnt, sumzero);
 	// A real LOP would also operate on the sources of the adder, not
 	// the result!
-	always @ ( sum)
+	always_comb
 		begin
 			i =   0;
 			while (~sum[163-i] && i <= 163) i = i+1;  // search for leading one 
--- a/wally-pipelined/src/fpu/FMA/multiply.sv
+++ b/wally-pipelined/src/fpu/FMA/multiply.sv
@ -2,31 +2,32 @@
 module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE); 
 /////////////////////////////////////////////////////////////////////////////
-	input 		[51:0]		xman;				// Fraction of multiplicand	x
+	input logic 		[51:0]		xman;				// Fraction of multiplicand	x
-	input		[51:0]		yman;				// Fraction of multiplicand y	
+	input logic		[51:0]		yman;				// Fraction of multiplicand y	
-	input					xdenormE;		// is x denormalized	
+	input logic					xdenormE;		// is x denormalized	
-	input					ydenormE;		// is y denormalized	
+	input logic					ydenormE;		// is y denormalized	
-	input     			xzeroE;		// Z is denorm
+	input logic     			xzeroE;		// Z is denorm
-	input     			yzeroE;		// Z is denorm
+	input logic     			yzeroE;		// Z is denorm
-	output		[105:0]		rE;				//	partial product 1	
+	output logic		[105:0]		rE;				//	partial product 1	
-	output		[105:0]		sE;				//	partial product 2	
+	output logic		[105:0]		sE;				//	partial product 2	
     wire        [54:0]      yExt; //y with appended 0 and assumed 1
     wire        [53:0]      xExt; //y with assumed 1
     wire [26:0][1:0] add1;
     wire [26:0][54:0] pp; 
     wire [26:0] e;
-     logic [17:0][105:0] lv1add;
+     logic [106:0] tmpsE;
-     logic [11:0][105:0] lv2add;
+     logic [17:0][106:0] lv1add;
-     logic [7:0][105:0] lv3add;
+     logic [11:0][106:0] lv2add;
-     logic [3:0][105:0] lv4add;
+     logic [7:0][106:0] lv3add;
-     logic [21:0][106:0] carryTmp;
+     logic [3:0][106:0] lv4add;
-     wire [26:0][105:0] acc; 
+     logic [21:0][107:0] carryTmp;
     wire [26:0][106:0] acc; 
     // wire [105:0] acc
    genvar i;	
-	assign xExt = {2'b0,~(xdenormE|xzeroE),xman};
+	assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
-	assign yExt = {2'b0,~(ydenormE|yzeroE),yman, 1'b0};
+	assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
     generate
        for(i=0; i<27; i=i+1) begin
@ -35,69 +36,70 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
     endgenerate
    assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; 
-    assign acc[1] = {50'b01,~e[1],pp[1],add1[0]}; 
+    assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; 
-    assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
+    assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
-    assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
+    assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
-    assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
+    assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
-    assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
+    assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
-    assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
+    assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
-    assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
+    assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
-    assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
+    assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
-    assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
+    assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
-    assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
+    assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
-    assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
+    assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
-    assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
+    assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
-    assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
+    assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
-    assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
+    assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
-    assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
+    assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
-    assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
+    assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
-    assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
+    assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
-    assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
+    assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
-    assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
+    assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
-    assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
+    assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
-    assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
+    assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
-    assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
+    assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
-    assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
+    assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
-    assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
+    assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
-    assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
+    assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
    assign acc[26] = {pp[26],add1[25], 50'b0};
    //*** resize adders
     generate
        for(i=0; i<9; i=i+1) begin
-            add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
+            add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
-                                           .carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
+                                           .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
-            assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
+            assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
        end
     endgenerate
     generate
        for(i=0; i<6; i=i+1) begin
-            add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
+            add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
-                                           .carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
+                                           .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
-            assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
+            assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
        end
     endgenerate
    generate
        for(i=0; i<4; i=i+1) begin
-            add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
+            add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
-                                            .carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
+                                            .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
-            assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
+            assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
        end
    endgenerate
    generate
        for(i=0; i<2; i=i+1) begin
-            add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
+            add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
                                            .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
-            assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
+            assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
        end
    endgenerate
-    add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
+    add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
-                                    .carry(carryTmp[21]), .sum(sE));
+                                    .carry(carryTmp[21]), .sum(tmpsE));
    assign sE = tmpsE[105:0];
    assign rE = {carryTmp[21][104:0], 1'b0};
 		// assign rE = 0;
 		// assign sE = acc[0] +
@ -131,3 +133,4 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
 			// assign sE = {53'b0,~(xdenormE|xzeroE),xman}  *  {53'b0,~(ydenormE|yzeroE),yman};
 			// assign rE = 0;
 endmodule
--- a/wally-pipelined/src/fpu/FMA/normalize.sv
+++ b/wally-pipelined/src/fpu/FMA/normalize.sv
@ -17,35 +17,31 @@
 module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero, 
 				xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v); 
 /////////////////////////////////////////////////////////////////////////////
-	input     	[163:0]  	sum;            // sum
+	input logic     	[163:0]  	sum;            // sum
-	input     	[62:52]  	zexp;            // sum
+	input logic     	[62:52]  	zexp;            // sum
-	input		[8:0] 		normcnt;     	// normalization shift count
+	input logic		[8:0] 		normcnt;     	// normalization shift count
-	input		[12:0] 		aeM;     	// normalization shift count
+	input logic		[12:0] 		aeM;     	// normalization shift count
-	input		[12:0] 		aligncntM;     	// normalization shift count
+	input logic		[12:0] 		aligncntM;     	// normalization shift count
-	input		[8:0] 		sumshiftM;     	// normalization shift count
+	input logic		[8:0] 		sumshiftM;     	// normalization shift count
-	input				sumshiftzeroM;
+	input logic				sumshiftzeroM;
-	input				sumzero;	// sum is zero
+	input logic				sumzero;	// sum is zero
-	input				bsM;		// sticky bit for addend
+	input logic				bsM;		// sticky bit for addend
-	input                  		xdenormM;        // Input Z is denormalized
+	input logic                  		xdenormM;        // Input Z is denormalized
-	input                  		ydenormM;        // Input Z is denormalized
+	input logic                  		ydenormM;        // Input Z is denormalized
-	input                  		zdenormM;        // Input Z is denormalized
+	input logic                  		zdenormM;        // Input Z is denormalized
-	input				xzeroM;
+	input logic				xzeroM;
-	input				yzeroM;
+	input logic				yzeroM;
-	input				zzeroM;
+	input logic				zzeroM;
-	output				sticky;		//sticky bit
+	output logic				sticky;		//sticky bit
-	output		[12:0]		de0;
+	output logic		[12:0]		de0;
-	output                  	resultdenorm;        // Input Z is denormalized
+	output logic                  	resultdenorm;        // Input Z is denormalized
-	output		[53:0]		v;		// normalized sum, R, S bits
+	output logic		[53:0]		v;		// normalized sum, R, S bits
 	// Internal nodes
-	reg       	[53:0]     	v;           	// normalized sum, R, S bits 
+logic       	[163:0]  	sumshifted;     // shifted sum
 	logic                  	resultdenorm;        // Input Z is denormalized
 	logic 		[12:0]	de0;
 	logic       	[163:0]  	sumshifted;     // shifted sum
 	logic		[9:0]		sumshifttmp;
 	logic       	[163:0]  	sumshiftedtmp;     // shifted sum
 	logic 				sticky;
 	logic				isShiftLeft1;
 logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
@ -60,28 +56,28 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 	// The sticky bit calculation is actually built into the shifter and
 	// does not require a true subtraction shown in the model.
-	assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
+	assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
-	assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
+	// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
-	always @(sum or sumshiftM or aeM or aligncntM or normcnt or bsM or isShiftLeft1 or zexp or zdenormM)
+	always_comb
 		begin
 		// d = aligncntM
 		// l = normcnt
 		// p = 53
 		// ea + eb = aeM
 			// set d<=2 to d<=0
-			if ($signed(aligncntM)<=$signed(2))  begin //d<=2 
+			if ($signed(aligncntM)<=$signed(13'd2))  begin //d<=2 
 				// product anchored or cancellation
-				if ($signed(aeM-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
+				if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
 					//normal result
-					de0 = xzeroM|yzeroM ? zexp : aeM-normcnt+xdenormM+ydenormM+57;
+					de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
 					resultdenorm = |sum & ~|de0 | de0[12];
 					// if z is zero then there was a 56 bit shift of the product
-					sumshifted = resultdenorm ? sum << sumshiftM-zzeroM+isShiftLeft1 : sum << normcnt; // p+2+l
+					sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
 					v = sumshifted[162:109];
 					sticky = (|sumshifted[108:0]) | bsM;
 					//de0 = aeM-normcnt+2-1023;
 				end else begin
-					sumshifted = sum << (1080+aeM);
+					sumshifted = sum << (13'd1080+aeM);
 					v = sumshifted[162:109];
 					sticky = (|sumshifted[108:0]) | bsM;
 					resultdenorm = 1;
@ -100,29 +96,29 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 				// the book says exp = zexp + {-1,0,1}
 				if(sumshiftzeroM) begin
 					v = sum[162:109];
-					sticky = sum[108:0] | bsM;
+					sticky = (|sum[108:0]) | bsM;
-					de0 = zexp;
+					de0 = {2'b0,zexp};
 				end else if(sumshifted[163] & ~sumshifttmp[9])begin
 					v = sumshifted[162:109];
 					sticky = (|sumshifted[108:0]) | bsM;
-					de0 = zexp +2;
+					de0 = {2'b0,zexp} +13'd2;
 				end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
 					v = sumshifted[161:108];
 					sticky = (|sumshifted[107:0]) | bsM;
-					de0 = zexp+1;
+					de0 = {2'b0,zexp}+13'd1;
 				end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
 					v = sumshifted[160:107];
 					sticky = (|sumshifted[106:0]) | bsM;
 					//de0 = zexp-1;
-					de0 = zexp+zdenormM;
+					de0 = {2'b0,zexp}+{12'b0,zdenormM};
 				end else if(sumshifted[160]& ~zdenormM) begin
-					de0 = zexp-1;
+					de0 = {2'b0,zexp}-13'b1;
 					v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
 					sticky = (|sumshifted[105:0]) | bsM;
 					//de0 = zexp-1;
 				end else if(sumshifted[159]& ~zdenormM) begin
 					//v = sumshifted[158:105];
-					de0 = zexp-2;
+					de0 = {2'b0,zexp}-13'd2;
 					v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
 					sticky = (|sumshifted[104:0]) | bsM;
 					//de0 = zexp-1;
@ -130,7 +126,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 					v = sumshifted[160:107];
 					sticky = (|sumshifted[106:0]) | bsM;
 					//de0 = zexp-1;
-					de0 = zexp;
+					de0 = {{2{zexp[62]}},zexp};
 				end else begin
 					de0 = 0;
 					sumshifted = sum << sumshiftM-1; // p+2+l
@ -148,3 +144,4 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 endmodule
--- a/wally-pipelined/src/fpu/FMA/round.sv
+++ b/wally-pipelined/src/fpu/FMA/round.sv
@ -4,7 +4,7 @@
 // Date:		11/2/1995
 //
 // Block Description: 
-//   This block is responsible for rounding the normalized result of //   the FMAC.   Because prenormalized results may be bypassed back to //   the FMAC X and z inputs, rounding does not appear in the critical //   path of most floating point code.   This is good because rounding //   requires an entire 52 bit carry-propagate half-adder delay.
+//   This block is responsible for rounding the normalized result of //   the FMAC.   Because prenormalized results may be bypassed back to //   the FMAC X and z input logics, rounding does not appear in the critical //   path of most floating point code.   This is good because rounding //   requires an entire 52 bit carry-propagate half-adder delay.
 //
 //   The results from other FPU blocks (e.g. FCVT,  FDIV,  etc)  are also 
 //   muxed in to form the actual result for register file writeback.  This
@ -19,23 +19,23 @@ module round(v, sticky, FrmM, wsign,
 			  wman, infinity, specialsel,expplus1);
 /////////////////////////////////////////////////////////////////////////////
-	input		[53:0]		v;		// normalized sum, R, S bits
+	input logic		[53:0]		v;		// normalized sum, R, S bits
-	input				sticky;		//sticky bit
+	input logic				sticky;		//sticky bit
-	input		[2:0]	FrmM;
+	input logic		[2:0]	FrmM;
-	input				wsign;		// Sign of result
+	input logic				wsign;		// Sign of result
-	input 		[4:0]	FmaFlagsM;
+	input logic 		[4:0]	FmaFlagsM;
-	input				inf;		// Some input is infinity
+	input logic				inf;		// Some input logic is infinity
-	input				nanM;		// Some input is NaN
+	input logic				nanM;		// Some input logic is NaN
-	input				xnanM;		// X is NaN
+	input logic				xnanM;		// X is NaN
-	input				ynanM;		// Y is NaN
+	input logic				ynanM;		// Y is NaN
-	input				znanM;		// Z is NaN
+	input logic				znanM;		// Z is NaN
-	input		[51:0]		xman;		// Input X
+	input logic		[51:0]		xman;		// input logic X
-	input		[51:0]		yman;		// Input Y
+	input logic		[51:0]		yman;		// input logic Y
-	input		[51:0]		zman;		// Input Z
+	input logic		[51:0]		zman;		// input logic Z
-	output		[51:0]		wman; 		// rounded result of FMAC
+	output logic		[51:0]		wman; 		// rounded result of FMAC
-	output				infinity;    	// Generate infinity on overflow
+	output logic				infinity;    	// Generate infinity on overflow
-	output				specialsel;  	// Select special result
+	output logic				specialsel;  	// Select special result
-	output				expplus1;
+	output logic				expplus1;
 	// Internal nodes
@ -56,7 +56,7 @@ module round(v, sticky, FrmM, wsign,
 	//	0xx - do nothing
 	//	100 - tie - plus1 if v[2] = 1
 	//	101/110/111 - plus1
-	always @ (FrmM, v, wsign, sticky) begin
+	always_comb begin
 		case (FrmM)
 			3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
 			3'b001: plus1 = 0;//round to zero
@ -85,7 +85,7 @@ module round(v, sticky, FrmM, wsign,
 	// The special result mux is a 4:1 mux that should not appear in the
 	// critical path of the machine.   It is not priority encoded,  despite
 	// the code below suggesting otherwise.  Also,  several of the identical data
-	// inputs to the wide muxes can be combined at the expense of more
+	// input logics to the wide muxes can be combined at the expense of more
 	// complicated non-critical control in the circuit implementation.
 	assign specialsel =  FmaFlagsM[2] ||  FmaFlagsM[1] ||  FmaFlagsM[4] || //overflow underflow invalid
@ -102,15 +102,15 @@ module round(v, sticky, FrmM, wsign,
 	assign infinityres = infinity ? 52'b0 : {52{1'b1}};
 	// Invalid operations produce a quiet NaN. The result should
-	// propagate an input if the input is NaN. Since we assume all
+	// propagate an input logic if the input logic is NaN. Since we assume all
-	// NaN inputs are already quiet, we don't have to force them quiet.
+	// NaN input logics are already quiet, we don't have to force them quiet.
 	// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
 	// IEEE 754-2008 section 6.2.3 states:
-	// "If two or more inputs are NaN, then the payload of the resulting NaN should be 
+	// "If two or more input logics are NaN, then the payload of the resulting NaN should be 
-	// identical to the payload of one of the input NaNs if representable in the destination
+	// identical to the payload of one of the input logic NaNs if representable in the destination
-	// format. This standard does not specify which of the input NaNs will provide the payload."
+	// format. This standard does not specify which of the input logic NaNs will provide the payload."
 	assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
 	// Select result with 4:1 mux
--- a/wally-pipelined/src/fpu/FMA/sign.sv
+++ b/wally-pipelined/src/fpu/FMA/sign.sv
@ -14,30 +14,28 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
 			 sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
 ////////////////////////////////////////////////////////////////////////////I
-	input					xsign;			// Sign of X 
+	input logic					xsign;			// Sign of X 
-	input					ysign;			// Sign of Y 
+	input logic					ysign;			// Sign of Y 
-	input					zsign;			// Sign of Z
+	input logic					zsign;			// Sign of Z
-	input					isAdd;
+	input logic					isAdd;
-	input					negsum0;		// Sum in +O mode is negative 
+	input logic					negsum0;		// Sum in +O mode is negative 
-	input					negsum1;		// Sum in +1 mode is negative 
+	input logic					negsum1;		// Sum in +1 mode is negative 
-	input					bsM;				// sticky bit from addend
+	input logic					bsM;				// sticky bit from addend
-	input		[2:0]		FrmM;				// Round toward minus infinity
+	input logic		[2:0]		FrmM;				// Round toward minus infinity
-	input		[4:0]		FmaFlagsM;				// Round toward minus infinity
+	input logic		[4:0]		FmaFlagsM;				// Round toward minus infinity
-	input					sumzero;		// Sum = O
+	input logic					sumzero;		// Sum = O
-	input					zinfM;			// Y = Inf
+	input logic					zinfM;			// Y = Inf
-	input					inf;			// Some input = Inf
+	input logic					inf;			// Some input logic = Inf
-	output					wsign;			// Sign of W 
+	output logic					wsign;			// Sign of W 
-	output					invz;			// Invert addend into adder
+	output logic					invz;			// Invert addend into adder
-	output					negsum;			// Negate result of adder
+	output logic					negsum;			// Negate result of adder
-	output					selsum1;		// Select +1 mode from compound adder
+	output logic					selsum1;		// Select +1 mode from compound adder
 	// Internal nodes
 	wire					zerosign;    	// sign if result= 0 
 	wire					sumneg;    	// sign if result= 0 
 	wire					infsign;     	// sign if result= Inf 
 	reg						negsum;         // negate result of adder 
 	reg						selsum1;     	// select +1 mode from compound adder 
 logic tmp;
 	// Compute sign of product 
@ -59,7 +57,7 @@ logic tmp;
 	assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
 	//always @(invz or negsum0 or negsum1 or bsM or ps)
 	//	begin
-	//		if (~invz) begin               // both inputs have same sign  
+	//		if (~invz) begin               // both input logics have same sign  
 	//			negsum = 0;
 	//			selsum1 = 0;
 	//		end else if (bsM) begin        // sticky bit set on addend
@ -84,7 +82,7 @@ logic tmp;
 	// Sign calculation is not in the critical path so the cases
 	// can be tolerated. 
 	// IEEE 754-2008 section 6.3 states 
-	// 		"When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
+	// 		"When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
 	// 		also pertaining to negZero it states:
 	//			"When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
 	//			 shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero 
--- a/wally-pipelined/src/fpu/FMA/special.sv
+++ b/wally-pipelined/src/fpu/FMA/special.sv
@ -14,21 +14,21 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
 				xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
 /////////////////////////////////////////////////////////////////////////////
-	input   	[63:0]     	ReadData1E;              // Input ReadData1E
+	input logic   	[63:0]     	ReadData1E;              // Input ReadData1E
-	input     	[63:0]     	ReadData2E;           	// Input ReadData2E
+	input logic     	[63:0]     	ReadData2E;           	// Input ReadData2E
-	input      	[63:0]    	ReadData3E;            	// Input ReadData3E 
+	input logic      	[63:0]    	ReadData3E;            	// Input ReadData3E 
-	output				xzeroE;		// Input ReadData1E = 0
+	output logic				xzeroE;		// Input ReadData1E = 0
-	output				yzeroE;		// Input ReadData2E = 0
+	output logic				yzeroE;		// Input ReadData2E = 0
-	output				zzeroE;		// Input ReadData3E = 0
+	output logic				zzeroE;		// Input ReadData3E = 0
-	output				xnanE;		// ReadData1E is NaN
+	output logic				xnanE;		// ReadData1E is NaN
-	output				ynanE;		// ReadData2E is NaN
+	output logic				ynanE;		// ReadData2E is NaN
-	output				znanE;		// ReadData3E is NaN
+	output logic				znanE;		// ReadData3E is NaN
-	output				xdenormE;	// ReadData1E is denormalized
+	output logic				xdenormE;	// ReadData1E is denormalized
-	output				ydenormE;	// ReadData2E is denormalized
+	output logic				ydenormE;	// ReadData2E is denormalized
-	output				zdenormE;	// ReadData3E is denormalized
+	output logic				zdenormE;	// ReadData3E is denormalized
-	output				xinfE;		// ReadData1E is infinity
+	output logic				xinfE;		// ReadData1E is infinity
-	output				yinfE;		// ReadData2E is infinity
+	output logic				yinfE;		// ReadData2E is infinity
-	output				zinfE;		// ReadData3E is infinity
+	output logic				zinfE;		// ReadData3E is infinity
 	// In the actual circuit design, the gates looking at bits
 	// 51:0 and at bits 62:52 should be shared among the various detectors.
@ -60,7 +60,7 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
 	// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
 	// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
 	// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
-	// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
+	// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
 	assign xzeroE = ~(|ReadData1E[62:0]);
 	assign yzeroE = ~(|ReadData2E[62:0]);
 	assign zzeroE = ~(|ReadData3E[62:0]);
--- a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
@ -1 +1 @@
-testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat
+testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
--- a/wally-pipelined/src/fpu/add.sv
+++ b/wally-pipelined/src/fpu/add.sv
@ -34,6 +34,7 @@ module add(rM, sM, tM, sum,
 	wire		[164:0] 	sum0;			// sum of compound adder +0 mode
 	wire		[164:0] 	sum1;			// sum of compound adder +1 mode
 	wire		[163:0] 	prodshifted;			// sum of compound adder +1 mode
 	wire		[164:0] 	tmp;			// sum of compound adder +1 mode
 	// Invert addend if z'sM sign is diffrent from the product'sM sign
@ -44,12 +45,13 @@ module add(rM, sM, tM, sum,
 	assign r2 = killprodM ? 106'b0 : rM;
 	assign s2 = killprodM ? 106'b0 : sM;
-	//replace this with a more structural cpa that synthisises better
+	//***replace this with a more structural cpa that synthisises better
 	// Compound adder
 	// Consists of 3:2 CSA followed by long compound CPA
-	// assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
+	//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
-	assign sum0 = {1'b0,prodshifted} + t2 + 158'b0 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0};
+	//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
-	assign sum1 = {1'b0,prodshifted} + t2 + 158'b1 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0}; // +1 from invert of z above
+	assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
 	assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
 	// Check sign bits in +0/1 modes 
 	assign negsum0 = sum0[164];
@ -60,3 +62,4 @@ module add(rM, sM, tM, sum,
 	assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
 endmodule
--- a/wally-pipelined/src/fpu/adder.sv
+++ b/wally-pipelined/src/fpu/adder.sv
@ -88,15 +88,15 @@ module BLOCK2A ( PIN2, GIN1, GIN2, GOUT );
   assign GOUT =  ~ (GIN2 | (PIN2 & GIN1));
 endmodule
-
+//***KEP all 0:63, 0:64 ect changed - changed due to lint warning
 module PRESTAGE_64 ( A, B, CIN, POUT, GOUT );
-   input  [0:63] A;
+   input  [63:0] A;
-   input [0:63]  B;
+   input [63:0]  B;
   input 	 CIN;
-   output [0:63] POUT;
+   output [63:0] POUT;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] );
   BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] );
@ -169,11 +169,11 @@ endmodule // PRESTAGE_64
 module DBLC_0_64 ( PIN, GIN, POUT, GOUT );
-   input  [0:63] PIN;
+   input  [63:0] PIN;
-   input [0:64]  GIN;
+   input [64:0]  GIN;
-   output [0:62] POUT;
+   output [62:0] POUT;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   INVBLOCK U10 (GIN[0] , GOUT[0] );
   BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] );
@ -246,11 +246,11 @@ endmodule // DBLC_0_64
 module DBLC_1_64 ( PIN, GIN, POUT, GOUT );
-   input  [0:62] PIN;
+   input  [62:0] PIN;
-   input [0:64]  GIN;
+   input [64:0]  GIN;
-   output [0:60] POUT;
+   output [60:0] POUT;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   INVBLOCK U10 (GIN[0] , GOUT[0] );
   INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -323,11 +323,11 @@ endmodule // DBLC_1_64
 module DBLC_2_64 ( PIN, GIN, POUT, GOUT );
-   input  [0:60] PIN;
+   input  [60:0] PIN;
-   input [0:64]  GIN;
+   input [64:0]  GIN;
-   output [0:56] POUT;
+   output [56:0] POUT;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   INVBLOCK U10 (GIN[0] , GOUT[0] );
   INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -400,11 +400,11 @@ endmodule // DBLC_2_64
 module DBLC_3_64 ( PIN, GIN, POUT, GOUT );
-   input  [0:56] PIN;
+   input  [56:0] PIN;
-   input [0:64]  GIN;
+   input [64:0]  GIN;
-   output [0:48] POUT;
+   output [48:0] POUT;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   INVBLOCK U10 (GIN[0] , GOUT[0] );
   INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -477,11 +477,11 @@ endmodule // DBLC_3_64
 module DBLC_4_64 ( PIN, GIN, POUT, GOUT );
-   input  [0:48] PIN;
+   input  [48:0] PIN;
-   input [0:64]  GIN;
+   input [64:0]  GIN;
-   output [0:32] POUT;
+   output [32:0] POUT;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   INVBLOCK U10 (GIN[0] , GOUT[0] );
   INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -554,11 +554,11 @@ endmodule // DBLC_4_64
 module DBLC_5_64 ( PIN, GIN, POUT, GOUT );
-   input  [0:32] PIN;
+   input  [32:0] PIN;
-   input [0:64]  GIN;
+   input [64:0]  GIN;
   output [0:0]  POUT;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   INVBLOCK U10 (GIN[0] , GOUT[0] );
   INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -631,12 +631,12 @@ endmodule // DBLC_5_64
 module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT );
-   input  [0:63] A;
+   input  [63:0] A;
-   input [0:63]  B;
+   input [63:0]  B;
   input 	 PBIT;
-   input [0:64]  CARRY;
+   input [64:0]  CARRY;
-   output [0:63] SUM;
+   output [63:0] SUM;
   output 	 COUT;
   XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] );
@ -710,22 +710,22 @@ endmodule // XORSTAGE_64
 module DBLCTREE_64 ( PIN, GIN, GOUT, POUT );
-   input  [0:63] PIN;
+   input  [63:0] PIN;
-   input [0:64]  GIN;
+   input [64:0]  GIN;
-   output [0:64] GOUT;
+   output [64:0] GOUT;
   output [0:0]  POUT;
-   wire [0:62] 	 INTPROP_0;
+   wire [62:0] 	 INTPROP_0;
-   wire [0:64] 	 INTGEN_0;
+   wire [64:0] 	 INTGEN_0;
-   wire [0:60] 	 INTPROP_1;
+   wire [60:0] 	 INTPROP_1;
-   wire [0:64] 	 INTGEN_1;
+   wire [64:0] 	 INTGEN_1;
-   wire [0:56] 	 INTPROP_2;
+   wire [56:0] 	 INTPROP_2;
-   wire [0:64] 	 INTGEN_2;
+   wire [64:0] 	 INTGEN_2;
-   wire [0:48] 	 INTPROP_3;
+   wire [48:0] 	 INTPROP_3;
-   wire [0:64] 	 INTGEN_3;
+   wire [64:0] 	 INTGEN_3;
-   wire [0:32] 	 INTPROP_4;
+   wire [32:0] 	 INTPROP_4;
-   wire [0:64] 	 INTGEN_4;
+   wire [64:0] 	 INTGEN_4;
   DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) );
   DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) );
@ -739,20 +739,20 @@ endmodule // DBLCTREE_64
 module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT );
-   input  [0:63] OPA;
+   input  [63:0] OPA;
-   input [0:63]  OPB;
+   input [63:0]  OPB;
   input 	 CIN;
-   output [0:63] SUM;
+   output [63:0] SUM;
   output 	 COUT;
-   wire [0:63] 	 INTPROP;
+   wire [63:0] 	 INTPROP;
-   wire [0:64] 	 INTGEN;
+   wire [64:0] 	 INTGEN;
   wire [0:0] 	 PBIT;
-   wire [0:64] 	 CARRY;
+   wire [64:0] 	 CARRY;
   PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN );
   DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT );
-   XORSTAGE_64 U3 (OPA[0:63] , OPB[0:63] , PBIT[0] , CARRY[0:64] , SUM , COUT );
+   XORSTAGE_64 U3 (OPA[63:0] , OPB[63:0] , PBIT[0] , CARRY[64:0] , SUM , COUT );
 endmodule 
--- a/wally-pipelined/src/fpu/align.sv
+++ b/wally-pipelined/src/fpu/align.sv
@ -30,21 +30,10 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
 	// Internal nodes
 	reg       	[215:0]   	shift;				// aligned addend from shifter
-	logic         		zexpsel;				// sticky bit of product
+	logic 		[12:0]		tmp;
 	reg       	[7:0]		i;				// temp storage for finding sticky bit
 	wire		[52:0]		z1;				// Z plus 1
 	wire		[51:0]		z2;				// Z selected after handling rounds
 	// Compute sign of aligncntE + 104 to check for shifting too far right 
 	//assign align104 = aligncntE+104;
 	// Shift addend by alignment count.  Generate sticky bits from
 	// addend on right shifts.  Handle special cases of shifting
 	// by too much.
 //***change always @ to always_combs
 	always_comb 
 		begin
@ -55,32 +44,34 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
 		killprodE = xzeroE | yzeroE;
 		// d = aligncntE
 		// p = 53
-		//***try reducing this hardware try getting onw shifter
+		//***try reducing this hardware to use one shifter
-		if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1
+		if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
 			//product ancored case with saturated shift
 			sumshiftE = 163;	// 3p+4	
 			sumshiftzeroE = 0;
 			shift = {1'b1,zman,163'b0} >> sumshiftE;
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
-			//zexpsel = 0;
+
-		end else if($signed(aligncntE) <= $signed(2))  begin // -2p+1<d<=2
+		end else if($signed(aligncntE) <= $signed(13'd2))  begin // -2p+1<d<=2
 			// product ancored or cancellation
-			sumshiftE = 57-aligncntE; // p + 2 - d  
+			tmp = 13'd57-aligncntE;
 			sumshiftE = tmp[8:0]; // p + 2 - d  
 			sumshiftzeroE = 0;
 			shift = {~zdenormE,zman,163'b0} >> sumshiftE;
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
-			//zexpsel = 0;
+
-		end else if ($signed(aligncntE)<=$signed(55))  begin // 2 < d <= p+2
+		end else if ($signed(aligncntE)<=$signed(13'd55))  begin // 2 < d <= p+2
 			// addend ancored case
-			// used to be 56 \/ somthing doesn'tE seem right too many typos
+			// used to be 56 \/ somthing doesn't seem right too many typos
-			sumshiftE = 57-aligncntE;
+			tmp = 13'd57-aligncntE;
 			sumshiftE = tmp[8:0]; 
 			sumshiftzeroE = 0;
 			shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
-			//zexpsel = 1;
+
 		end else begin                 	// d >= p+3
 			// addend anchored case with saturated shift
 			sumshiftE = 0;	
@ -89,15 +80,9 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
 			tE = zzeroE ? 0 : {shift[215:52]};
 			bsE = |(shift[51:0]);
 			killprodE = 1;
 			//ps = 1;
 			//zexpsel = 1;
 		// use some behavioral code to find sticky bit.  This is really
 		// done by hardware in the shifter.
 		//if (aligncntE < 0)
 		//	for (i=0; i<-aligncntE-52;  i = i+1)
 		//		bsE = bsE || z2[i];
 		end 
 	end
 endmodule
--- a/wally-pipelined/src/fpu/bk15.sv
+++ b/wally-pipelined/src/fpu/bk15.sv
@ -31,6 +31,11 @@ module kogge_stone (h, c, p, g);
   output [15:1] h;
   output [15:1] c;
   logic H_1_0,H_2_1,I_2_1,H_3_2,I_3_2,H_4_3,I_4_3,H_5_4,I_5_4,H_6_5,I_6_5,H_7_6,I_7_6,H_8_7,I_8_7,H_9_8,I_9_8,H_10_9
      ,I_10_9,H_11_10,I_11_10,H_12_11,I_12_11,H_13_12,I_13_12,H_14_13,I_14_13,H_2_0,H_3_0,H_4_1,I_4_1,H_5_2,I_5_2,H_6_3
      ,I_6_3,H_7_4,I_7_4,H_8_5,I_8_5,H_9_6,I_9_6,H_10_7,I_10_7,H_11_8,I_11_8,H_12_9,I_12_9,H_13_10,I_13_10,H_14_11,I_14_11
      ,H_4_0,H_5_0,H_6_0,H_7_0,H_8_1,I_8_1,H_9_2,I_9_2,H_10_3,I_10_3,H_11_4,I_11_4,H_12_5,I_12_5,H_13_6,I_13_6,H_14_7
      ,I_14_7,H_8_0,H_9_0,H_10_0,H_11_0,H_12_0,H_13_0,H_14_0;
   // parallel-prefix, Kogge-Stone
--- a/wally-pipelined/src/fpu/booth.sv
+++ b/wally-pipelined/src/fpu/booth.sv
@ -22,9 +22,6 @@ module booth(xExt, choose, add1, e, pp);
        3'b100 : pp = {negx, 1'b0};  // -2
        3'b101 : pp = {1'b1, negx};  // -1
        3'b110 : pp = {1'b1, negx};  // -1
        // *** <Thomas Fleming> I changed this to fix a lint error. '1 should
        // fill the signal with all ones.
        // 3'b111 : pp = 55'hfffffffffffffff;
        3'b111 : pp = '1;  //  -0
    endcase
--- a/wally-pipelined/src/fpu/cla12.sv
+++ b/wally-pipelined/src/fpu/cla12.sv
@ -9,7 +9,7 @@ module cla12 (S, CO, X, Y);
   output [11:0] S;
   output 	 CO;
-   wire [0:63] 	 A,B,Q;
+   wire [63:0] 	 A,B,Q;//***KEP was 0:63 - changed due to lint warning
   wire 	 LOGIC0;
   wire 	 CIN;
   wire 	 CO_64;
@ -174,10 +174,11 @@ module cla_sub12 (S, X, Y);
   output [11:0] S;
-   wire [0:63] 	 A,B,Q,Bbar;
+   wire [63:0] 	 A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
   wire 	 CO;
   wire 	 LOGIC0;
   wire 	 VDD;
   logic CO_12;
   assign Bbar = ~B;
   assign LOGIC0 = 0;
--- a/wally-pipelined/src/fpu/cla52.sv
+++ b/wally-pipelined/src/fpu/cla52.sv
@ -9,7 +9,7 @@ module cla52 (S, CO, X, Y);
   output [51:0] S;
   output 	 CO;
-   wire [0:63] 	 A,B,Q;
+   wire [63:0] 	 A,B,Q;//***KEP was 0:63 - changed due to lint warning
   wire 	 LOGIC0;
   wire 	 CIN;
   wire 	 CO_64;
@ -211,7 +211,7 @@ module cla_sub52 (S, X, Y);
   output [51:0] S;
-   wire [0:63] 	 A,B,Q,Bbar;
+   wire [63:0] 	 A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
   wire 	 LOGIC0;
   wire 	 CIN;
   wire 	 CO_52;
--- a/wally-pipelined/src/fpu/cla64.sv
+++ b/wally-pipelined/src/fpu/cla64.sv
@ -9,7 +9,7 @@ module cla64 (S, X, Y, Sub);
   input 	 Sub;
   output [63:0] S;
   wire 	 CO;
-   wire [0:63] 	 A,B,Q, Bbar;
+   wire [63:0] 	 A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
   DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO );
   assign A[0] = X[0];
@ -220,7 +220,7 @@ module cla_sub64 (S, X, Y);
   wire 	 CO;
   wire 	 VDD = 1'b1;
-   wire [0:63] 	 A,B,Q, Bbar;
+   wire [63:0] 	 A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
   DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO );
   assign A[0] = X[0];
--- a/wally-pipelined/src/fpu/divconv.sv
+++ b/wally-pipelined/src/fpu/divconv.sv
@ -42,6 +42,8 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
   logic [63:0] 	d2, n2;   
   logic [11:0] 	d3;  
   logic cout1, cout2, cout3, cout4, cout5, cout6, cout7, muxr_out; 
   // Check if exponent is odd for sqrt
   // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
   assign d2 = (exp_odd&op_type) ? {vss,d,10'h0} : {d,11'h0};
--- a/wally-pipelined/src/fpu/expgen1.sv
+++ b/wally-pipelined/src/fpu/expgen1.sv
@ -50,7 +50,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 	//   if exponent is out of bounds 
-	assign aeE = xzeroE|yzeroE ? 0 : xexp + yexp -1023;
+	assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
 	assign prodof = (aeE > 2046 && ~aeE[12]);
@ -61,7 +61,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 	// is masked by the bypass mux and two 10 bit adder delays.
 	// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
 	// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
-	assign aligncntE = zexp -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
+	assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
 	//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
 	//assign aligncntE = zexp - aeE;// KEP use all of aeE
@ -87,3 +87,4 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
 	// rounding mode.  NaNs are propagated or generated.
 endmodule
--- a/wally-pipelined/src/fpu/expgen2.sv
+++ b/wally-pipelined/src/fpu/expgen2.sv
@ -102,6 +102,7 @@ module expgen2(xexp, yexp, zexp,
 	// A mux selects the early result from other FPU blocks or the 
 	// normalized FMAC result.   Special cases are also detected. 
-	assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1; 
+	assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1}; 
 endmodule
--- a/wally-pipelined/src/fpu/fctrl.sv
+++ b/wally-pipelined/src/fpu/fctrl.sv
@ -3,20 +3,23 @@ module fctrl (
  input  logic [6:0] Funct7D,
  input  logic [6:0] OpD,
  input  logic [4:0] Rs2D,
-  input  logic [4:0] Rs1D,
+  input  logic [2:0] Funct3D,
-  input  logic [2:0] FrmW,
+  input  logic [2:0] FRM_REGW,
-  output logic       WriteEnD,
+  output logic       IllegalFPUInstrD,
  output logic       FRegWriteD,
  output logic       DivSqrtStartD,
  //output logic [2:0] regSelD,
-  output logic [2:0] WriteSelD,
+  output logic [2:0] FResultSelD,
  output logic [3:0] OpCtrlD,
  output logic       FmtD,
  output logic [2:0] FrmD,
  output logic       WriteIntD);
  //precision is taken directly from instruction
  assign FmtD = Funct7D[0];
  // *** fix rounding for dynamic rounding
  assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
  //all subsequent logic is based on the table present
  //in Section 5 of Wally Architecture Specification
@ -29,59 +32,75 @@ module fctrl (
 	//in case of errors
 	case(OpD)
 		//fp instructions sans load
-		7'b1010011 : begin isFP = 1'b1; isFPLD = 1'b0; end
+		7'b1010011 : isFP = 1'b1;
-		7'b1000011 : begin isFP = 1'b1; isFPLD = 1'b0; end
+		7'b1000011 : isFP = 1'b1;
-		7'b1000111 : begin isFP = 1'b1; isFPLD = 1'b0; end
+		7'b1000111 : isFP = 1'b1;
-		7'b1001011 : begin isFP = 1'b1; isFPLD = 1'b0; end
+		7'b1001011 : isFP = 1'b1;
-		7'b1001111 : begin isFP = 1'b1; isFPLD = 1'b0; end
+		7'b1001111 : isFP = 1'b1;
-		7'b0100111 : begin isFP = 1'b1; isFPLD = 1'b0; end
+		7'b0100111 : isFP = 1'b1;
-		//fp load	
+		7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111
-		7'b1010011 : begin isFP = 1'b1; isFPLD = 1'b1; end
+		default    : isFP = 1'b0;
 		default : begin isFP = 1'b0; isFPLD = 1'b0; end
 	endcase
  end
-  assign WriteEnD = isFP & ~isFPLD; 
+
  //useful intermediary signals
  //
  //(mult only not supported in current datapath)
  //set third FMA operand to zero in this case
  //(or equivalent)
  logic isAddSub, isFMA, isMult, isDivSqrt, isCvt, isCmp, isFPSTR;
  always_comb begin
 	//checks all but FMA/store/load
 	if(OpD == 7'b1010011) begin
-  		case(Funct7D)
+  		casez(Funct7D)
 			//compare	
-			7'b10100?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b1; isFPSTR = 1'b0; end
+			7'b10100?? : FResultSelD = 3'b001;
 			//div/sqrt
-			7'b0?011?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b1; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b0?011?? : FResultSelD = 3'b000;
 			//add/sub
-			7'b0000??? : begin isAddSub = 1'b1; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b0000??? : FResultSelD = 3'b100;
 			//mult
-			7'b00010?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b1; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b00010?? : FResultSelD = 3'b010;
 			//convert (not precision)
-			7'b110?0?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b1; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b110?0?? : FResultSelD = 3'b100;
 			//convert (precision)
-			7'b010000? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b1; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b010000? : FResultSelD = 3'b100;
 			//Min/Max
 			7'b00101?? : FResultSelD = 3'b001;
 			//sign injection
 			7'b00100?? : FResultSelD = 3'b011;
 			//classify //only if funct3 = 001 
 			7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101;
 			//output ReadData1
                   else if (Funct7D[1] == 0) FResultSelD = 3'b111;
 			//output SrcW
 			7'b111100? : FResultSelD = 3'b110;
 			default    : FResultSelD = 3'bxxx;
 		endcase
 	end
 	//FMA/store/load
 	else begin
  		case(OpD)
 			//4 FMA instructions
-			7'b1000011 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b1000011 : FResultSelD = 3'b010;
-			7'b1000111 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b1000111 : FResultSelD = 3'b010;
-			7'b1001011 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b1001011 : FResultSelD = 3'b010;
-			7'b1001111 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
+			7'b1001111 : FResultSelD = 3'b010;
-			//store (load already found)
+			//store
-			7'b0100111 : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b1; end
+			7'b0100111 : FResultSelD = 3'b111;
 			//load
 			7'b0000111 : FResultSelD = 3'b111;
 			default    : FResultSelD = 3'bxxx;
 		endcase
 	end
  end
  //register is chosen based on operation performed
  //---- 
  //write selection is chosen in the same way as 
@ -90,26 +109,21 @@ module fctrl (
  // reg/write sel logic and assignment
  // 
-  // 3'b000 = add/sub/cvt
+  // 3'b000 = div/sqrt
-  // 3'b001 = sign
+  // 3'b001 = cmp
-  // 3'b010 = fma
+  // 3'b010 = fma/mult
-  // 3'b011 = cmp
+  // 3'b011 = sgn inj
-  // 3'b100 = div/sqrt
+  // 3'b100 = add/sub/cnvt
  // 3'b101 = classify
  // 3'b110 = output SrcAW
  // 3'b111 = output ReadData1
  //
  //reg select
  //this value is used enough to be shorthand
  logic isSign;
  assign isSign = ~Funct7D[6] & ~Funct7D[5] & Funct7D[4] & ~Funct7D[3] & ~Funct7D[2];
  //write select
  assign WriteSelD[2] = isDivSqrt & ~isFMA;
  assign WriteSelD[1] = isFMA | isCmp;
  //AND of Funct7 for sign
  assign WriteSelD[0] = isCmp | isSign;
  //if op is div/sqrt - start div/sqrt
-  assign DivSqrtStartD = isDivSqrt & ~isFMA;
+  assign DivSqrtStartD = ~|FResultSelD; // is FResultSelD == 000
  //operation control for each fp operation
  //has to be expanded over standard to account for
@ -126,23 +140,74 @@ module fctrl (
 //
 //
-  //add/cvt chooses unsigned conversion here
+
-  assign OpCtrlD[3] = (isAddSub & Rs2D[0]) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0);
+ 
-  //add/cvt chooses FP/int or int/FP conversion 
+  always_comb begin
-  assign OpCtrlD[2] = (isAddSub & (Funct7D[6] & Funct7D[5] & ~Funct7D[4])) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0);
+    IllegalFPUInstrD = 0;
-  //compare chooses equals
+    case (FResultSelD)
-  //sign chooses sgnjx
+      // div/sqrt
-  //add/cvt can chooses between abs/neg functions, but they aren't used in the
+      //  fdiv  = ???0
-  //wally-spec
+      //  fsqrt = ???1
-  assign OpCtrlD[1] = (isAddSub & 1'b0) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & FrmW[2]) | (isSign & FrmW[1]);
+      3'b000 : OpCtrlD = {3'b0, Funct7D[5]};
-  //divide chooses between div/sqrt
+      // cmp		
-  //compare chooses between LT and LE
+      //  fmin = ?100
-  //sign chooses between sgnj and sgnjn
+      //  fmax = ?101
-  //add/cvt chooses between add/sub or single-precision conversion
+      //  feq  = ?010
-  assign OpCtrlD[0] = (isAddSub & (Funct7D[2] | Funct7D[0])) | (isFMA & 1'b0) | (isDivSqrt & Funct7D[5]) | (isCmp & FrmW[1]) | (isSign & FrmW[0]);
+      //  flt  = ?001
      //  fle  = ?011
      //		   {?,    is min or max, is eq or le, is lt or le}
      3'b001 : OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])};
      //fma/mult	
      //  fmadd  = ?000
      //  fmsub  = ?001
      //  fnmadd = ?010
      //  fnmsub = ?011
      //  fmul   = ?100
      //		  {?, is mul, is negitive, is sub}
      3'b010 : OpCtrlD = {1'b0, OpD[4:2]};
      // sgn inj
      //  fsgnj  = ??00
      //  fsgnjn = ??01
      //  fsgnjx = ??10
      3'b011 : OpCtrlD = {2'b0, Funct3D[1:0]};
      // add/sub/cnvt
      //  fadd      = 0000
      //  fsub      = 0001
      //  fcvt.w.s  = 0100
      //  fcvt.wu.s = 0101
      //  fcvt.s.w  = 0110
      //  fcvt.s.wu = 0111
      //  fcvt.s.d  = 0010
      //  fcvt.w.d  = 1100
      //  fcvt.wu.d = 1101
      //  fcvt.d.w  = 1110
      //  fcvt.d.wu = 1111
      //  fcvt.d.s  = 1000
      //		   { is double and not add/sub, is to/from int, is to int or float to double,      is unsigned or sub
      3'b100 : OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])};
      // classify	  {?, ?, ?, ?}
      3'b101 : OpCtrlD = 4'b0;
      // output SrcAW
      //  fmv.w.x = ???0
      //  fmv.w.d = ???1
      3'b110 : OpCtrlD = {3'b0, Funct7D[0]};
      // output ReadData1
      //  flw       = ?000
      //  fld       = ?001
      //  fsw       = ?010
      //  fsd       = ?011
      //  fmv.x.w  = ?100
      //  fmv.d.w  = ?101
      //		   {?, is mv, is store, is double or fcvt.d.w}
      3'b111 : OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])};
      default : begin OpCtrlD = 4'bxxxx; IllegalFPUInstrD = isFP; end
    endcase
  end
  //write to integer source if conv to int occurs
  //AND of Funct7 for int results 
-  assign WriteIntD = isCvt & (Funct7D[6] & Funct7D[5] & ~Funct7D[4] & ~Funct7D[3] & ~Funct7D[2] & ~Funct7D[1]);
+  //			is add/cvt       and  is to int  or is classify		 or     is cmp	       	and not max/min or is output ReadData1 and is mv
-
+  assign WriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b001)&OpD[6]);
  // 		      if not writting to int reg and not a store function and not move
  assign FRegWriteD = ~WriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]);
 endmodule
--- a/wally-pipelined/src/fpu/flag1.sv
+++ b/wally-pipelined/src/fpu/flag1.sv
@ -21,7 +21,7 @@ module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
 	output logic				prodinfE;	// X*Y larger than max possible
-	// If any input is NaN, propagate the NaN 
+	// If any input logic is NaN, propagate the NaN 
 	assign nanE = xnanE || ynanE || znanE;
--- a/wally-pipelined/src/fpu/flag2.sv
+++ b/wally-pipelined/src/fpu/flag2.sv
@ -55,8 +55,8 @@ logic suminf;
 	assign FmaFlagsM[2] = suminf && ~inf;
 	// Set the underflow  flag for the following cases:
-	//   1) Any input is denormalized
+	//   1) Any input logic is denormalized
-	//   2)  Output would be denormalized or smaller
+	//   2)  output logic would be denormalized or smaller
 	assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
@ -70,7 +70,7 @@ logic suminf;
 	// Set invalid flag for following cases:
 	//   1) Inf - Inf
 	//   2) 0 * Inf
-	//   3) Output = NaN (this is not part of the IEEE spec,  only 486 proj)
+	//   3) output logic = NaN (this is not part of the IEEE spec,  only 486 proj)
 	assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
 					   xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
--- a/wally-pipelined/src/fpu/fma1.sv
+++ b/wally-pipelined/src/fpu/fma1.sv
@ -34,7 +34,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
 			, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
 			xinfE, yinfE, zinfE, nanE, prodinfE);
 /////////////////////////////////////////////////////////////////////////////
- //***clean up code, comment, fix names, and c3f000200003fffe * 0000000000000001 + 001ffffffffffffe error
+ 
 	input logic 		[63:0]		ReadData1E;		// input 1
 	input logic		[63:0]		ReadData2E;     // input 2 
 	input logic 		[63:0]		ReadData3E;     // input 3
@ -42,7 +42,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
 	output logic 		[12:0]		aligncntE;    	// status flags
 	output logic 		[105:0]		rE; 				// one result of partial product sum
 	output logic 		[105:0]		sE; 				// other result of partial products
-	output logic 		[163:0]		tE;				// output of alignment shifter	
+	output logic 		[163:0]		tE;				// output logic of alignment shifter	
 	output logic 		[12:0]		aeE; 		// multiplier expoent
 	output logic 					bsE;				// sticky bit of addend
 	output logic 					killprodE; 		// ReadData3E >> product
@ -65,7 +65,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
 // Internal nodes
-//	output 		[12:0]		aligncntE; 		// shift count for alignment
+//	output logic 		[12:0]		aligncntE; 		// shift count for alignment
 	logic 					prodof; 		// ReadData1E*ReadData2E out of range
@ -95,7 +95,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
 	special			special(.*);
-// Instantiate control output
+// Instantiate control output logic
 flag1				flag1(.*); 
--- a/wally-pipelined/src/fpu/fma2.sv
+++ b/wally-pipelined/src/fpu/fma2.sv
@ -15,13 +15,13 @@
 //    normalize Normalization shifter
 //    round     Rounding of result
 //    exception Handles exceptional cases
-//    bypass    Handles bypass of result to ReadData1M or ReadData3M inputs
+//    bypass    Handles bypass of result to ReadData1M or ReadData3M input logics
 //    sign      One bit sign handling block 
-//    special   Catch special cases (inputs = 0  / infinity /  etc.) 
+//    special   Catch special cases (input logics = 0  / infinity /  etc.) 
 //
 //   The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
 //   RN, RZ, RM, or RP.  The result is optionally bypassed back to
-//   the ReadData1M or ReadData3M inputs for use on the next cycle.  In addition,  four signals
+//   the ReadData1M or ReadData3M input logics for use on the next cycle.  In addition,  four signals
 //   are produced: trap, overflow, underflow, and inexact.  Trap indicates
 //   an infinity, NaN, or denormalized number to be handled in software;
 //   the other three signals are IMMM flags.
@ -39,9 +39,9 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
 );
 /////////////////////////////////////////////////////////////////////////////
-	input logic	 	[63:0]		ReadData1M;		// input 1
+	input logic 		[63:0]		ReadData1M;		// input logic 1
-	input logic	 	[63:0]		ReadData2M;     // input 2 
+	input logic		[63:0]		ReadData2M;     // input logic 2 
-	input logic		[63:0]		ReadData3M;     // input 3
+	input logic 		[63:0]		ReadData3M;     // input logic 3
 	input logic 		[2:0]	 	FrmM;          	// Rounding mode
 	input logic 		[12:0]		aligncntM;    	// status flags
 	input logic 		[105:0]		rM; 				// one result of partial product sum
@ -67,9 +67,12 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
 	input logic					nanM;
 	input logic			[8:0]		sumshiftM;
 	input logic					sumshiftzeroM;
 	output logic 		[63:0]		FmaResultM;     // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
 	output logic 		[4:0]		FmaFlagsM;    	// status flags
 // Internal nodes
 	logic 		[163:0]		sum;			// output of carry prop adder
 	logic 		[53:0]		v; 				// normalized sum, R, S bits
--- a/wally-pipelined/src/fpu/fpdiv.sv
+++ b/wally-pipelined/src/fpu/fpdiv.sv
@ -84,13 +84,14 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
   wire [127:0]  regr_out;
   wire [2:0] 	 sel_muxa, sel_muxb;
   wire 	 sel_muxr;   
-   wire 	 load_rega, load_regb, load_regc, load_regd, load_regr;
+   wire 	 load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
   wire 	 donev, sel_muxrv, sel_muxsv;
   wire [1:0] 	 sel_muxav, sel_muxbv;   
   wire 	 load_regav, load_regbv, load_regcv;
   wire 	 load_regrv, load_regsv;
   logic exp_cout1, exp_cout2, exp_odd, open;
   // Convert the input operands to their appropriate forms based on 
   // the orignal operands, the DivOpType , and their precision DivP. 
   // Single precision inputs are converted to double precision 
@ -138,7 +139,7 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
   // FSM : control divider
   fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd, 
 		load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, 
-		clk, reset, DivStart, error, DivOpType);
+		clk, reset, DivStart, DivOpType);
   // Round the mantissa to a 52-bit value, with the leading one
   // removed. The rounding units also handles special cases and 
@ -191,6 +192,9 @@ module brent_kung (c, p, g);
   input [13:0] g;
   output [14:1] c;
   logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
   logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
   logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
   // parallel-prefix, Brent-Kung
   // Stage 1: Generates G/DivP pairs that span 1 bits
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -1,19 +1,22 @@
 `include "wally-config.vh"
 //  `include "../../config/rv64icfd/wally-config.vh" //debug
 module fpu (
  //input  logic [2:0]       FrmD,
  input  logic [2:0]       FRM_REGW,    // Rounding mode from CSR
  input  logic             reset,
-  //input  logic             clear,     // *** what is this used for?
+  //input  logic             clear,     // *** not being used anywhere
  input  logic             clk,
  input  logic [31:0]      InstrD,
  input  logic [`XLEN-1:0] SrcAE,       // Integer input being processed
  input  logic [`XLEN-1:0] SrcAM,       // Integer input being written into fpreg
  input  logic 		   StallE, StallM, StallW,
  input  logic             FlushE, FlushM, FlushW,
  output logic [4:0]       SetFflagsM,
  output logic [31:0]      FSROutW,
  output logic             DivSqrtDoneE,
-  output logic             FInvalInstrD,
+  output logic             IllegalFPUInstrD,
  output logic [`XLEN-1:0] FPUResultW);
  //NOTE:
@ -45,12 +48,12 @@ module fpu (
  localparam PipeEnable = 1'b1;
  always_comb begin
-	  PipeEnableDE = PipeEnable;
+	  PipeEnableDE = StallE;
-	  PipeEnableEM = PipeEnable;
+	  PipeEnableEM = StallM;
-	  PipeEnableMW = PipeEnable;
+	  PipeEnableMW = StallW;
-	  PipeClearDE = PipeClear;
+	  PipeClearDE = FlushE;
-	  PipeClearEM = PipeClear;
+	  PipeClearEM = FlushM;
-	  PipeClearMW = PipeClear;
+	  PipeClearMW = FlushW;
  end
@ -63,33 +66,33 @@ module fpu (
  //
  //wally-spec D stage control logic signal instantiation
  logic                    IllegalFPUInstrFaultD;
  logic                    FRegWriteD;
  logic [2:0]              FResultSelD;
  logic [2:0]              FrmD;
-  logic                    PD;
+  logic                    FmtD;
  logic                    DivSqrtStartD;
  logic [3:0]              OpCtrlD;
  logic                    WriteIntD;
  //top-level controller for FPU
-  fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Rs1D(InstrD[19:15]), .FrmW(InstrD[14:12]), .WriteEnD(FRegWriteD), .WriteSelD(FResultSelD), .FmtD(PD), .*);
+  fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
  //instantiation of D stage regfile signals (includes some W stage signals
  //for easy reference)
  logic [2:0]              FrmW;
-  logic                    WriteEnW;
+  logic                    FmtW;
  logic                    FRegWriteW;
  logic [4:0]              RdW, Rs1D, Rs2D, Rs3D;
  logic [`XLEN-1:0]        WriteDataW;
  logic [63:0] FPUResultDirW; 
  logic [`XLEN-1:0]        ReadData1D, ReadData2D, ReadData3D; 
  //regfile instantiation
-  freg3adr fpregfile (FrmW, reset, PipeClear, clk, RdW, WriteEnW, Rs1D, Rs2D, Rs3D, WriteDataW, ReadData1D, ReadData2D, ReadData3D);
+  freg3adr fpregfile (FmtW, reset, PipeClear, clk, RdW, FRegWriteW, InstrD[19:15], InstrD[24:20], InstrD[31:27], FPUResultDirW, ReadData1D, ReadData2D, ReadData3D);
  always_comb begin
     FrmW = InstrD[14:12];
  end
  //always_comb begin
  //   FrmW = InstrD[14:12];
  //end
  //
  //END DECODE STAGE
  //#########################################
@ -102,7 +105,7 @@ module fpu (
  logic                    FRegWriteE;
  logic [2:0]              FResultSelE;
  logic [2:0]              FrmE;
-  logic                    PE;
+  logic                    FmtE;
  logic                    DivSqrtStartE;
  logic [3:0]              OpCtrlE;
@ -187,9 +190,10 @@ module fpu (
  flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE);
  flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
  flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
-  flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, PD, PE);
+  flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
-  flopenrc #(4) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
+  flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
-  flopenrc #(1) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
+  flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
  flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
  //
  //END D/E PIPE
@ -205,10 +209,10 @@ module fpu (
  fpdiv fpdivsqrt (.*);
  //first of two-stage instance of floating-point add/cvt unit
-  fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, AddOp1E, AddOp2E, AddRmE, AddOpTypeE, AddPE, AddOvEnE, AddUnEnE);
+  fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, ReadData1E, ReadData2E, FrmE, OpCtrlE, FmtE);
  //first of two-stage instance of floating-point comparator
-  fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, CmpOp1E, CmpOp2E, CmpSelE);
+  fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, ReadData1E, ReadData2E, OpCtrlE[1:0]);
  //first and only instance of floating-point sign converter
  fpusgn fpsgn (.*);
@ -221,33 +225,33 @@ module fpu (
  //truncate to 64 bits
  //(causes warning during compilation - case never reached) 
-  if(`XLEN > 64) begin
+//   if(`XLEN > 64) begin // ***KEP this isn't usedand it causes a lint error
-        DivOp1 <= ReadData1E[`XLEN-1:`XLEN-64];
+//         DivOp1 = ReadData1E[`XLEN-1:`XLEN-64];
-	DivOp2 <= ReadData2E[`XLEN-1:`XLEN-64];
+// 	DivOp2 = ReadData2E[`XLEN-1:`XLEN-64];
-        AddOp1E <= ReadData1E[`XLEN-1:`XLEN-64];
+//         AddOp1E = ReadData1E[`XLEN-1:`XLEN-64];
-	AddOp2E <= ReadData2E[`XLEN-1:`XLEN-64];
+// 	AddOp2E = ReadData2E[`XLEN-1:`XLEN-64];
-        CmpOp1E <= ReadData1E[`XLEN-1:`XLEN-64];
+//         CmpOp1E = ReadData1E[`XLEN-1:`XLEN-64];
-	CmpOp2E <= ReadData2E[`XLEN-1:`XLEN-64];
+// 	CmpOp2E = ReadData2E[`XLEN-1:`XLEN-64];
-        SgnOp1E <= ReadData1E[`XLEN-1:`XLEN-64];
+//         SgnOp1E = ReadData1E[`XLEN-1:`XLEN-64];
-	SgnOp2E <= ReadData2E[`XLEN-1:`XLEN-64];
+// 	SgnOp2E = ReadData2E[`XLEN-1:`XLEN-64];
-  end
+//   end
-  //zero extend to 64 bits
+//   //zero extend to 64 bits
-  else begin
+//   else begin
-        DivOp1 <= {ReadData1E,{64-`XLEN{1'b0}}};
+//         DivOp1 = {ReadData1E,{64-`XLEN{1'b0}}};
-	DivOp2 <= {ReadData2E,{64-`XLEN{1'b0}}};
+// 	DivOp2 = {ReadData2E,{64-`XLEN{1'b0}}};
-        AddOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
+//         AddOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
-	AddOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
+// 	AddOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
-        CmpOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
+//         CmpOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
-	CmpOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
+// 	CmpOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
-        SgnOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
+//         SgnOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
-	SgnOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
+// 	SgnOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
-  end
+//   end
  //assign op codes
-  AddOpTypeE[3:0] <= OpCtrlE[3:0];
+  AddOpTypeE[3:0] = OpCtrlE[3:0];
-  CmpSelE[1:0] <= OpCtrlE[1:0];
+  CmpSelE[1:0] = OpCtrlE[1:0];
-  DivOpType <= OpCtrlE[0];
+  DivOpType = OpCtrlE[0];
-  SgnOpCodeE[1:0] <= OpCtrlE[1:0];
+  SgnOpCodeE[1:0] = OpCtrlE[1:0];
  end 
@ -266,7 +270,7 @@ module fpu (
  logic                    FRegWriteM;
  logic [2:0]              FResultSelM;
  logic [2:0]              FrmM;
-  logic                    PM;
+  logic                    FmtM;
  logic [3:0]              OpCtrlM;
  //instantiate M stage FMA signals here ***rename fma signals and resize for XLEN
@ -340,17 +344,17 @@ module fpu (
  flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); 
  flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); 
  flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); 
-  flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); 
+  flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); 
-  flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); 
+  flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); 
  flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); 
  flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); 
  flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); 
  flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); 
-  flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); 
+  flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); 
-  flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); 
+  flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); 
  flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); 
-  flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); 
+  flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); 
-  flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); 
+  flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); 
  flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); 
  flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); 
  flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); 
@ -414,8 +418,9 @@ module fpu (
  flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM);
  flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
  flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
-  flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, PE, PM);
+  flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
-  flopenrc #(4) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
+  flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
  flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
  //
  //END E/M PIPE
@ -443,9 +448,7 @@ module fpu (
  //
  //wally-spec W stage control logic signal instantiation
  logic                    FRegWriteW;
  logic [2:0]              FResultSelW;
  logic                    PW;
  //instantiate W stage fma signals here
  logic [63:0]             FmaResultW;
@ -470,9 +473,14 @@ module fpu (
  logic                    AddDenormW;
  //instantiation of W stage cmp signals
  logic [63:0]             CmpResultW;
  logic                    CmpInvalidW;
  logic [1:0]              CmpFCCW; 
  //instantiation of W stage classify signals
  logic [63:0]             ClassResultW;
  logic [4:0]              ClassFlagsW;
  //*****************
  //fma M/W pipe registers
  //*****************
@ -510,7 +518,9 @@ module fpu (
  //*****************
  flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW);
  flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
-  flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, PM, PW);
+  flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
  flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
  flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
  ////END M/W PIPE
  //*****************************************
@ -527,21 +537,61 @@ module fpu (
  //set to cmp flags
  //iff bit one is low - if bit zero is active set to add/cvt flags - otherwise
  //set to div/sqrt flags
-  assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
+  //assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
-	             (FResultSelW[1]) ? 
+//	             (FResultSelW[1]) ? 
-		     ( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) ) 
+//		     ( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) ) 
-		     : ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) ) 
+//		     : ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) ) 
-                     );
+//                     );
  always_comb begin
 	case (FResultSelW)
 		// div/sqrt
 		3'b000 : FPUFlagsW = DivFlagsW;
 		// cmp		
 		3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
 		//fma/mult
 		3'b010 : FPUFlagsW = FmaFlagsW;
 		// sgn inj
 		3'b011 : FPUFlagsW = SgnFlagsW;
 		// add/sub/cnvt
 		3'b100 : FPUFlagsW = AddFlagsW;
 		// classify
 		3'b101 : FPUFlagsW = ClassFlagsW;
 		// output SrcAW
 		3'b110 : FPUFlagsW = 5'b0;
 		// output ReadData1
 		3'b111 : FPUFlagsW = 5'b0;
 		default : FPUFlagsW = 5'bxxxxx;
 	endcase
  end
  //result mux via in-line ternaries
  logic [63:0] FPUResultDirW; 
  //the uses the same logic as for flag signals
-  assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
+  //assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
-	             (FResultSelW[1]) ? 
+  //	             (FResultSelW[1]) ? 
-		     ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) ) 
+  //		     ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) ) 
-		     : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) ) 
+  //		     : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) ) 
-                     );
+  //                   );
-
+  always_comb begin
 	case (FResultSelW)
 		// div/sqrt
 		3'b000 : FPUResultDirW = DivResultW;
 		// cmp		
 		3'b001 : FPUResultDirW = CmpResultW;
 		//fma/mult
 		3'b010 : FPUResultDirW = FmaResultW;
 		// sgn inj
 		3'b011 : FPUResultDirW = SgnResultW;
 		// add/sub/cnvt
 		3'b100 : FPUResultDirW = AddResultW;
 		// classify
 		3'b101 : FPUResultDirW = ClassResultW;
 		// output SrcAW
 		3'b110 : FPUResultDirW = SrcAW;
 		// output ReadData1
 		3'b111 : FPUResultDirW = ReadData1W;
 		default : FPUResultDirW = {64{1'bx}};
 	endcase
  end
  //interface between XLEN size datapath and double-precision sized
  //floating-point results
  //
@ -555,11 +605,12 @@ module fpu (
 // Repetition multiplier must be constant.
  //if(`XLEN > 64) begin
-  //    FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
+  //    FPUResultW = {FPUResultDirW,{XLENDIFF{1'b0}}};
  //end
  //truncate
  //else begin
-      FPUResultW <= FPUResultDirW[63:64-`XLEN];
+      FPUResultW = FPUResultDirW[63:64-`XLEN];
      SetFflagsM = FPUFlagsW;
  //end
  end  
--- a/wally-pipelined/src/fpu/fpuaddcvt1.sv
+++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv
@ -27,18 +27,16 @@
 //
-module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin, OvEn, UnEn);
+module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin);
-   input [63:0] op1;		// 1st input operand (A)
+   input logic [63:0] op1;		// 1st input operand (A)
-   input [63:0] op2;		// 2nd input operand (B)
+   input logic [63:0] op2;		// 2nd input operand (B)
-   input [2:0] 	rm;		// Rounding mode - specify values 
+   input logic [2:0] 	rm;		// Rounding mode - specify values 
-   input [3:0]	op_type;	// Function opcode
+   input logic [3:0]	op_type;	// Function opcode
-   input 	Pin;   		// Result Precision (0 for double, 1 for single)
+   input logic 	Pin;   		// Result Precision (1 for double, 0 for single)
   input 	OvEn;		// Overflow trap enabled
   input 	UnEn;   	// Underflow trap enabled
   wire          P;
-   assign P = Pin | op_type[2];
+   assign P = ~Pin | op_type[2];
   wire [63:0] 	 IntValue;
   wire [11:0] 	 exp1, exp2;
@ -56,23 +54,23 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   wire 	 zeroB;
   wire [5:0]	 align_shift; 
-   output [63:0] 	 Float1; 
+   output logic [63:0] 	 Float1; 
-   output [63:0] 	 Float2;
+   output logic [63:0] 	 Float2;
-   output [10:0] 	 exponent;
+   output logic [10:0] 	 exponent;
-   output [10:0]	 exponent_postsum;
+   output logic [10:0]	 exponent_postsum;
-   output [10:0]	 exp1_denorm, exp2_denorm;
+   output logic [11:0]	 exp1_denorm, exp2_denorm;//KEP used to be [10:0]
-   output [63:0] sum, sum_tc;
+   output logic [63:0] sum, sum_tc;
-   output [3:0]  sel_inv;
+   output logic [3:0]  sel_inv;
-   output        corr_sign;
+   output logic        corr_sign;
-   output 	 signA;
+   output logic 	 signA;
-   output	 op1_Norm, op2_Norm;
+   output logic	 op1_Norm, op2_Norm;
-   output	 opA_Norm, opB_Norm;
+   output logic	 opA_Norm, opB_Norm;
-   output	 Invalid;
+   output logic	 Invalid;
-   output 	 DenormIn;
+   output logic 	 DenormIn;
-//   output 	 exp_valid;
+//   output logic 	 exp_valid;
-   output 	 convert;
+   output logic 	 convert;
-   output        swap;
+   output logic        swap;
-   output 	 normal_overflow;
+   output logic 	 normal_overflow;
   wire [5:0]	 ZP_mantissaA;
   wire [5:0]	 ZP_mantissaB;
   wire		 ZV_mantissaA;
@ -129,15 +127,15 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
   // Denormalized exponents created by subtracting the leading zeroes from the original exponents
-   assign exp1_denorm = swap ? (exp1 - ZP_mantissaB) : (exp1 - ZP_mantissaA);
+   assign exp1_denorm = swap ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa 
-   assign exp2_denorm = swap ? (exp2 - ZP_mantissaA) : (exp2 - ZP_mantissaB);
+   assign exp2_denorm = swap ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
   // Determine the alignment shift and limit it to 63. If any bit from 
   // exp_shift[6] to exp_shift[11] is one, then shift is set to all ones. 
   assign exp_shift = swap ? exp_diff2 : exp_diff1;
   assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9] 
     | exp_shift[8] | exp_shift[7] | exp_shift[6];
-   assign align_shift = exp_shift | {6{exp_gt63}};
+   assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
   // Unpack the 52-bit mantissas to 57-bit numbers of the form.
   //    001.M[51]M[50] ... M[1]M[0]00
@ -193,7 +191,8 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
   cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
   // Finds normal underflow result to determine whether to round final exponent down
-   assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
+   //***KEP used to be (sum == 16'h0) I am unsure what it's supposed to be
   assign normal_overflow = (DenormIn & (sum == 64'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
 endmodule // fpadd
--- a/wally-pipelined/src/fpu/fpuaddcvt2.sv
+++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv
@ -27,7 +27,7 @@
 //
-module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
+module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
   input [63:0] AddOp1M;		// 1st input operand (A)
   input [63:0] AddOp2M;		// 2nd input operand (B)
@ -51,7 +51,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   input         AddCorrSignM;
   input 	 AddConvertM;
   input          AddSwapM;
-   input 	 AddNormOvflowM;
+   // input 	 AddNormOvflowM;
   output [63:0] AddResultM;	// Result of operation
   output [4:0]  AddFlagsM;   	// IEEE exception flags 
@ -80,6 +80,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
   wire 	 Float2_sum_tc_comp;
   wire 	 normal_underflow;
   wire [63:0]   sum_corr;
   logic AddNormOvflowM;
   //AddExponentM value pre-rounding with considerations for denormalized
   //cases/conversion cases
@ -116,7 +117,8 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
 			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
   // Finds normal underflow result to determine whether to round final AddExponentM down
-   assign AddNormOvflowM = (AddDenormInM & (AddSumM == 16'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
+   //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
   assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
   // Leading-Zero Detector. Determine the size of the shift needed for
   // normalization. If sum_corrected is all zeros, the exp_valid is 
--- a/wally-pipelined/src/fpu/freg.sv
+++ b/wally-pipelined/src/fpu/freg.sv
@ -1,8 +1,9 @@
 `include "wally-config.vh"
 //  `include "../../config/rv64icfd/wally-config.vh" //debug
 module freg1adr (
-  input  logic [2:0]       frm,
+  input  logic 	       	   FmtW,
  input  logic             reset,
  input  logic             clear,
  input  logic             clk,
@ -13,7 +14,7 @@ module freg1adr (
  output logic [`XLEN-1:0] readData);
  //note - not word aligning based on precision of 
-  //operation (frm)
+  //operation (FmtW)
  //reg number should remain static, but it doesn't hurt
  //to parameterize
@ -139,7 +140,7 @@ endmodule
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 module freg2adr (
-  input  logic [2:0]       frm,
+  input  logic 	           FmtW,
  input  logic             reset,
  input  logic             clear,
  input  logic             clk,
@ -152,7 +153,7 @@ module freg2adr (
  output logic [`XLEN-1:0] readData2);
  //note - not word aligning based on precision of 
-  //operation (frm)
+  //operation (FmtW)
  //reg number should remain static, but it doesn't hurt
  //to parameterize
@ -310,7 +311,7 @@ endmodule
 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 module freg3adr (
-  input  logic [2:0]       frm,
+  input  logic    	   FmtW,
  input  logic             reset,
  input  logic             clear,
  input  logic             clk,
@ -325,7 +326,7 @@ module freg3adr (
  output logic [`XLEN-1:0] readData3);
  //note - not word aligning based on precision of 
-  //operation (frm)
+  //operation (FmtW)
  //reg number should remain static, but it doesn't hurt
  //to parameterize
--- a/wally-pipelined/src/fpu/fsm.sv
+++ b/wally-pipelined/src/fpu/fsm.sv
@ -1,12 +1,12 @@
 module fsm (done, load_rega, load_regb, load_regc, 
 	    load_regd, load_regr, load_regs,
 	    sel_muxa, sel_muxb, sel_muxr, 
-	    clk, reset, start, error, op_type);
+	    clk, reset, start, op_type);
   input 	clk;
   input 	reset;
   input 	start;
-   input 	error;
+//    input 	error;
   input  	op_type;
   output       done;      
@ -50,9 +50,9 @@ module fsm (done, load_rega, load_regb, load_regc,
   always @(posedge clk)
     begin
 	if(reset==1'b1)
-	  CURRENT_STATE<=S0;
+	  CURRENT_STATE=S0;
 	else
-	  CURRENT_STATE<=NEXT_STATE;
+	  CURRENT_STATE=NEXT_STATE;
     end
   always @(*)
@ -72,7 +72,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 		    sel_muxa = 3'b000;
 		    sel_muxb = 3'b000;
 		    sel_muxr = 1'b0;
-		    NEXT_STATE <= S0;
+		    NEXT_STATE = S0;
 		 end 
 	       else if (start==1'b1 && op_type==1'b0) 
 		 begin
@ -86,7 +86,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 		    sel_muxa = 3'b001;
 		    sel_muxb = 3'b001;		    
 		    sel_muxr = 1'b0;
-		    NEXT_STATE <= S1;
+		    NEXT_STATE = S1;
 		 end // if (start==1'b1 && op_type==1'b0)
 	       else if (start==1'b1 && op_type==1'b1) 
 		 begin
@ -100,7 +100,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 		    sel_muxa = 3'b010;
 		    sel_muxb = 3'b000;		    
 		    sel_muxr = 1'b0;
-		    NEXT_STATE <= S13;
+		    NEXT_STATE = S13;
 		 end 	       
 	    end // case: S0
 	  S1:
@ -115,7 +115,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b010;
 	       sel_muxb = 3'b000;		    
 	       sel_muxr = 1'b0;	
-	       NEXT_STATE <= S2;
+	       NEXT_STATE = S2;
 	    end	  
 	  S2: // iteration 1
 	    begin
@ -129,7 +129,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S3;
+	       NEXT_STATE = S3;
 	    end
 	  S3:
 	    begin
@ -143,7 +143,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S4;
+	       NEXT_STATE = S4;
 	    end
 	  S4: // iteration 2
 	    begin
@ -157,7 +157,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S5;
+	       NEXT_STATE = S5;
 	    end
 	  S5:
 	    begin
@ -171,7 +171,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;  // add
-	       NEXT_STATE <= S6;
+	       NEXT_STATE = S6;
 	    end
 	  S6: // iteration 3
 	    begin
@ -185,7 +185,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S8;
+	       NEXT_STATE = S8;
 	    end
 	  S7:
 	    begin
@ -199,7 +199,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S8;
+	       NEXT_STATE = S8;
 	    end // case: S7
 	  S8: // q,qm,qp
 	    begin
@ -213,7 +213,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S9;
+	       NEXT_STATE = S9;
 	    end 
 	  S9:  // rem
 	    begin
@ -227,7 +227,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b1;
-	       NEXT_STATE <= S10;
+	       NEXT_STATE = S10;
 	    end 	  
 	  S10:  // done
 	    begin
@ -241,7 +241,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S0;
+	       NEXT_STATE = S0;
 	    end 
 	  S13:  // start of sqrt path
 	    begin
@ -255,7 +255,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b010;
 	       sel_muxb = 3'b001;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S14;
+	       NEXT_STATE = S14;
 	    end
 	  S14:  
 	    begin
@ -269,7 +269,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b001;
 	       sel_muxb = 3'b100;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S15;
+	       NEXT_STATE = S15;
 	    end 
 	  S15:  // iteration 1
 	    begin
@ -283,7 +283,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S16;
+	       NEXT_STATE = S16;
 	    end
 	  S16:  
 	    begin
@ -297,7 +297,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S17;
+	       NEXT_STATE = S17;
 	    end
 	  S17:  
 	    begin
@ -311,7 +311,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b100;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S18;
+	       NEXT_STATE = S18;
 	    end
 	  S18:  // iteration 2
 	    begin
@ -325,7 +325,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S19;
+	       NEXT_STATE = S19;
 	    end
 	  S19:  
 	    begin
@ -339,7 +339,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S20;
+	       NEXT_STATE = S20;
 	    end
 	  S20:  
 	    begin
@ -353,7 +353,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b100;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S21;
+	       NEXT_STATE = S21;
 	    end
 	  S21:  // iteration 3
 	    begin
@ -367,7 +367,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S22;
+	       NEXT_STATE = S22;
 	    end
 	  S22:  
 	    begin
@ -381,7 +381,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S23;
+	       NEXT_STATE = S23;
 	    end
 	  S23:  
 	    begin
@ -395,7 +395,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b100;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S24;
+	       NEXT_STATE = S24;
 	    end 
 	  S24: // q,qm,qp
 	    begin
@ -409,7 +409,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S25;
+	       NEXT_STATE = S25;
 	    end 	  
 	  S25:  // rem
 	    begin
@ -423,7 +423,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b110;
 	       sel_muxr = 1'b1;
-	       NEXT_STATE <= S26;
+	       NEXT_STATE = S26;
 	    end 	  
 	  S26:  // done
 	    begin
@ -437,7 +437,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S0;
+	       NEXT_STATE = S0;
 	    end 
 	  default: 
 	    begin
@ -451,7 +451,7 @@ module fsm (done, load_rega, load_regb, load_regc,
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE <= S0;
+	       NEXT_STATE = S0;
 	    end
 	endcase // case(CURRENT_STATE)	
     end // always @ (CURRENT_STATE or X)   
--- a/wally-pipelined/src/fpu/ldf128.sv
+++ b/wally-pipelined/src/fpu/ldf128.sv
@ -31,6 +31,56 @@ module ladner_fischer128 (c, p, g);
   output [128:1] c;
  logic G_1_0, G_3_2, P_3_2, G_5_4, P_5_4, G_7_6, P_7_6, G_9_8, P_9_8, G_11_10, P_11_10, G_13_12
      , P_13_12, G_15_14, P_15_14, G_17_16, P_17_16, G_19_18, P_19_18, G_21_20, P_21_20, G_23_22
      , P_23_22, G_25_24, P_25_24, G_27_26, P_27_26, G_29_28, P_29_28, G_31_30, P_31_30, G_33_32
      , P_33_32, G_35_34, P_35_34, G_37_36, P_37_36, G_39_38, P_39_38, G_41_40, P_41_40, G_43_42
      , P_43_42, G_45_44, P_45_44, G_47_46, P_47_46, G_49_48, P_49_48, G_51_50, P_51_50, G_53_52
      , P_53_52, G_55_54, P_55_54, G_57_56, P_57_56, G_59_58, P_59_58, G_61_60, P_61_60, G_63_62
      , P_63_62, G_65_64, P_65_64, G_67_66, P_67_66, G_69_68, P_69_68, G_71_70, P_71_70, G_73_72
      , P_73_72, G_75_74, P_75_74, G_77_76, P_77_76, G_79_78, P_79_78, G_81_80, P_81_80, G_83_82
      , P_83_82, G_85_84, P_85_84, G_87_86, P_87_86, G_89_88, P_89_88, G_91_90, P_91_90, G_93_92
      , P_93_92, G_95_94, P_95_94, G_97_96, P_97_96, G_99_98, P_99_98, G_101_100, P_101_100, G_103_102
      , P_103_102, G_105_104, P_105_104, G_107_106, P_107_106, G_109_108, P_109_108, G_111_110, P_111_110
      , G_113_112, P_113_112, G_115_114, P_115_114, G_117_116, P_117_116, G_119_118, P_119_118, G_121_120
      , P_121_120, G_123_122, P_123_122, G_125_124, P_125_124, G_127_126, P_127_126, G_3_0, G_7_4, P_7_4
      , G_11_8, P_11_8, G_15_12, P_15_12, G_19_16, P_19_16, G_23_20, P_23_20, G_27_24, P_27_24, G_31_28
      , P_31_28, G_35_32, P_35_32, G_39_36, P_39_36, G_43_40, P_43_40, G_47_44, P_47_44, G_51_48, P_51_48
      , G_55_52, P_55_52, G_59_56, P_59_56, G_63_60, P_63_60, G_67_64, P_67_64, G_71_68, P_71_68, G_75_72
      , P_75_72, G_79_76, P_79_76, G_83_80, P_83_80, G_87_84, P_87_84, G_91_88, P_91_88, G_95_92, P_95_92
      , G_99_96, P_99_96, G_103_100, P_103_100, G_107_104, P_107_104, G_111_108, P_111_108, G_115_112
      , P_115_112, G_119_116, P_119_116, G_123_120, P_123_120, G_127_124, P_127_124, G_5_0, G_7_0, G_13_8
      , P_13_8, G_15_8, P_15_8, G_21_16, P_21_16, G_23_16, P_23_16, G_29_24, P_29_24, G_31_24, P_31_24
      , G_37_32, P_37_32, G_39_32, P_39_32, G_45_40, P_45_40, G_47_40, P_47_40, G_53_48, P_53_48, G_55_48
      , P_55_48, G_61_56, P_61_56, G_63_56, P_63_56, G_69_64, P_69_64, G_71_64, P_71_64, G_77_72, P_77_72
      , G_79_72, P_79_72, G_85_80, P_85_80, G_87_80, P_87_80, G_93_88, P_93_88, G_95_88, P_95_88, G_101_96
      , P_101_96, G_103_96, P_103_96, G_109_104, P_109_104, G_111_104, P_111_104, G_117_112, P_117_112
      , G_119_112, P_119_112, G_125_120, P_125_120, G_127_120, P_127_120, G_9_0, G_11_0, G_13_0, G_15_0, G_25_16
      , P_25_16, G_27_16, P_27_16, G_29_16, P_29_16, G_31_16, P_31_16, G_41_32, P_41_32, G_43_32, P_43_32, G_45_32
      , P_45_32, G_47_32, P_47_32, G_57_48, P_57_48, G_59_48, P_59_48, G_61_48, P_61_48, G_63_48, P_63_48, G_73_64
      , P_73_64, G_75_64, P_75_64, G_77_64, P_77_64, G_79_64, P_79_64, G_89_80, P_89_80, G_91_80, P_91_80
      , G_93_80, P_93_80, G_95_80, P_95_80, G_105_96, P_105_96, G_107_96, P_107_96, G_109_96, P_109_96
      , G_111_96, P_111_96, G_121_112, P_121_112, G_123_112, P_123_112, G_125_112, P_125_112, G_127_112
      , P_127_112, G_17_0, G_19_0, G_21_0, G_23_0, G_25_0, G_27_0, G_29_0, G_31_0, G_49_32, P_49_32, G_51_32
      , P_51_32, G_53_32, P_53_32, G_55_32, P_55_32, G_57_32, P_57_32, G_59_32, P_59_32, G_61_32, P_61_32
      , G_63_32, P_63_32, G_81_64, P_81_64, G_83_64, P_83_64, G_85_64, P_85_64, G_87_64, P_87_64, G_89_64, P_89_64
      , G_91_64, P_91_64, G_93_64, P_93_64, G_95_64, P_95_64, G_113_96, P_113_96, G_115_96, P_115_96
      , G_117_96, P_117_96, G_119_96, P_119_96, G_121_96, P_121_96, G_123_96, P_123_96, G_125_96, P_125_96
      , G_127_96, P_127_96, G_33_0, G_35_0, G_37_0, G_39_0, G_41_0, G_43_0, G_45_0, G_47_0, G_49_0, G_51_0
      , G_53_0, G_55_0, G_57_0, G_59_0, G_61_0, G_63_0, G_97_64, P_97_64, G_99_64, P_99_64, G_101_64, P_101_64
      , G_103_64, P_103_64, G_105_64, P_105_64, G_107_64, P_107_64, G_109_64, P_109_64, G_111_64, P_111_64
      , G_113_64, P_113_64, G_115_64, P_115_64, G_117_64, P_117_64, G_119_64, P_119_64, G_121_64, P_121_64
      , G_123_64, P_123_64, G_125_64, P_125_64, G_127_64, P_127_64, G_65_0, G_67_0, G_69_0, G_71_0, G_73_0
      , G_75_0, G_77_0, G_79_0, G_81_0, G_83_0, G_85_0, G_87_0, G_89_0, G_91_0, G_93_0, G_95_0, G_97_0
      , G_99_0, G_101_0, G_103_0, G_105_0, G_107_0, G_109_0, G_111_0, G_113_0, G_115_0, G_117_0, G_119_0
      , G_121_0, G_123_0, G_125_0, G_127_0, G_2_0, G_4_0, G_6_0, G_8_0, G_10_0, G_12_0, G_14_0, G_16_0
      , G_18_0, G_20_0, G_22_0, G_24_0, G_26_0, G_28_0, G_30_0, G_32_0, G_34_0, G_36_0, G_38_0, G_40_0
      , G_42_0, G_44_0, G_46_0, G_48_0, G_50_0, G_52_0, G_54_0, G_56_0, G_58_0, G_60_0, G_62_0, G_64_0
      , G_66_0, G_68_0, G_70_0, G_72_0, G_74_0, G_76_0, G_78_0, G_80_0, G_82_0, G_84_0, G_86_0, G_88_0
      , G_90_0, G_92_0, G_94_0, G_96_0, G_98_0, G_100_0, G_102_0, G_104_0, G_106_0, G_108_0, G_110_0, G_112_0
      , G_114_0, G_116_0, G_118_0, G_120_0, G_122_0, G_124_0, G_126_0;
   // parallel-prefix, Ladner-Fischer
   // Stage 1: Generates G/P pairs that span 1 bits
--- a/wally-pipelined/src/fpu/ldf64.sv
+++ b/wally-pipelined/src/fpu/ldf64.sv
@ -29,6 +29,22 @@ module ladner_fischer64 (c, p, g);
   output [64:1] c;
   logic G_1_0,G_3_2,P_3_2,G_5_4,P_5_4,G_7_6,P_7_6,G_9_8,P_9_8,G_11_10,P_11_10,G_13_12,P_13_12,G_15_14,P_15_14
      ,G_17_16,P_17_16,G_19_18,P_19_18,G_21_20,P_21_20,G_23_22,P_23_22,G_25_24,P_25_24,G_27_26,P_27_26,G_29_28,P_29_28
      ,G_31_30,P_31_30,G_33_32,P_33_32,G_35_34,P_35_34,G_37_36,P_37_36,G_39_38,P_39_38,G_41_40,P_41_40,G_43_42,P_43_42
      ,G_45_44,P_45_44,G_47_46,P_47_46,G_49_48,P_49_48,G_51_50,P_51_50,G_53_52,P_53_52,G_55_54,P_55_54,G_57_56,P_57_56
      ,G_59_58,P_59_58,G_61_60,P_61_60,G_63_62,P_63_62,G_3_0,G_7_4,P_7_4,G_11_8,P_11_8,G_15_12,P_15_12,G_19_16,P_19_16
      ,G_23_20,P_23_20,G_27_24,P_27_24,G_31_28,P_31_28,G_35_32,P_35_32,G_39_36,P_39_36,G_43_40,P_43_40,G_47_44,P_47_44
      ,G_51_48,P_51_48,G_55_52,P_55_52,G_59_56,P_59_56,G_63_60,P_63_60,G_5_0,G_7_0,G_13_8,P_13_8,G_15_8,P_15_8,G_21_16
      ,P_21_16,G_23_16,P_23_16,G_29_24,P_29_24,G_31_24,P_31_24,G_37_32,P_37_32,G_39_32,P_39_32,G_45_40,P_45_40,G_47_40
      ,P_47_40,G_53_48,P_53_48,G_55_48,P_55_48,G_61_56,P_61_56,G_63_56,P_63_56,G_9_0,G_11_0,G_13_0,G_15_0,G_25_16
      ,P_25_16,G_27_16,P_27_16,G_29_16,P_29_16,G_31_16,P_31_16,G_41_32,P_41_32,G_43_32,P_43_32,G_45_32,P_45_32,G_47_32
      ,P_47_32,G_57_48,P_57_48,G_59_48,P_59_48,G_61_48,P_61_48,G_63_48,P_63_48,G_17_0,G_19_0,G_21_0,G_23_0,G_25_0,G_27_0
      ,G_29_0,G_31_0,G_49_32,P_49_32,G_51_32,P_51_32,G_53_32,P_53_32,G_55_32,P_55_32,G_57_32,P_57_32,G_59_32,P_59_32
      ,G_61_32,P_61_32,G_63_32,P_63_32,G_33_0,G_35_0,G_37_0,G_39_0,G_41_0,G_43_0,G_45_0,G_47_0,G_49_0,G_51_0,G_53_0
      ,G_55_0,G_57_0,G_59_0,G_61_0,G_63_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0,G_14_0,G_16_0,G_18_0,G_20_0,G_22_0
      ,G_24_0,G_26_0,G_28_0,G_30_0,G_32_0,G_34_0,G_36_0,G_38_0,G_40_0,G_42_0,G_44_0,G_46_0,G_48_0,G_50_0,G_52_0
      ,G_54_0,G_56_0,G_58_0,G_60_0,G_62_0;
   // parallel-prefix, Ladner-Fischer
   // Stage 1: Generates G/P pairs that span 1 bits
--- a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv
+++ b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv
@ -240,6 +240,7 @@ module multiplier( y, x, Sum, Carry );
   // Below are the nets for the partial products (booth)
   wire 	  pp_0_0;
   wire    pp_0_1;
   wire 	  pp_0_2;
   wire 	  pp_1_2;
   wire 	  pp_0_3;
--- a/wally-pipelined/src/fpu/multiply.sv
+++ b/wally-pipelined/src/fpu/multiply.sv
@ -16,17 +16,18 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
     wire [26:0][1:0] add1;
     wire [26:0][54:0] pp; 
     wire [26:0] e;
-     logic [17:0][105:0] lv1add;
+     logic [106:0] tmpsE;
-     logic [11:0][105:0] lv2add;
+     logic [17:0][106:0] lv1add;
-     logic [7:0][105:0] lv3add;
+     logic [11:0][106:0] lv2add;
-     logic [3:0][105:0] lv4add;
+     logic [7:0][106:0] lv3add;
-     logic [21:0][106:0] carryTmp;
+     logic [3:0][106:0] lv4add;
-     wire [26:0][105:0] acc; 
+     logic [21:0][107:0] carryTmp;
     wire [26:0][106:0] acc; 
     // wire [105:0] acc
    genvar i;	
-	assign xExt = {2'b0,~(xdenormE|xzeroE),xman};
+	assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
-	assign yExt = {2'b0,~(ydenormE|yzeroE),yman, 1'b0};
+	assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
     generate
        for(i=0; i<27; i=i+1) begin
@ -35,69 +36,70 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
     endgenerate
    assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; 
-    assign acc[1] = {50'b01,~e[1],pp[1],add1[0]}; 
+    assign acc[1] = {49'b01,~e[1],pp[1],add1[0]}; 
-    assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
+    assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
-    assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
+    assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
-    assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
+    assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
-    assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
+    assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
-    assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
+    assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
-    assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
+    assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
-    assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
+    assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
-    assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
+    assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
-    assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
+    assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
-    assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
+    assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
-    assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
+    assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
-    assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
+    assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
-    assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
+    assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
-    assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
+    assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
-    assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
+    assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
-    assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
+    assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
-    assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
+    assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
-    assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
+    assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
-    assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
+    assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
-    assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
+    assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
-    assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
+    assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
-    assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
+    assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
-    assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
+    assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
-    assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
+    assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
    assign acc[26] = {pp[26],add1[25], 50'b0};
    //*** resize adders
     generate
        for(i=0; i<9; i=i+1) begin
-            add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
+            add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
-                                           .carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
+                                           .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
-            assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
+            assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
        end
     endgenerate
     generate
        for(i=0; i<6; i=i+1) begin
-            add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
+            add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
-                                           .carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
+                                           .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
-            assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
+            assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
        end
     endgenerate
    generate
        for(i=0; i<4; i=i+1) begin
-            add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
+            add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
-                                            .carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
+                                            .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
-            assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
+            assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
        end
    endgenerate
    generate
        for(i=0; i<2; i=i+1) begin
-            add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
+            add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
                                            .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
-            assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
+            assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
        end
    endgenerate
-    add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
+    add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
-                                    .carry(carryTmp[21]), .sum(sE));
+                                    .carry(carryTmp[21]), .sum(tmpsE));
    assign sE = tmpsE[105:0];
    assign rE = {carryTmp[21][104:0], 1'b0};
 		// assign rE = 0;
 		// assign sE = acc[0] +
@ -131,3 +133,4 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
 			// assign sE = {53'b0,~(xdenormE|xzeroE),xman}  *  {53'b0,~(ydenormE|yzeroE),yman};
 			// assign rE = 0;
 endmodule
--- a/wally-pipelined/src/fpu/normalize.sv
+++ b/wally-pipelined/src/fpu/normalize.sv
@ -56,8 +56,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 	// The sticky bit calculation is actually built into the shifter and
 	// does not require a true subtraction shown in the model.
-	assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
+	assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
-	assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
+	// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
 	always_comb
 		begin
 		// d = aligncntM
@ -65,19 +65,19 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 		// p = 53
 		// ea + eb = aeM
 			// set d<=2 to d<=0
-			if ($signed(aligncntM)<=$signed(2))  begin //d<=2 
+			if ($signed(aligncntM)<=$signed(13'd2))  begin //d<=2 
 				// product anchored or cancellation
-				if ($signed(aeM-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
+				if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
 					//normal result
-					de0 = xzeroM|yzeroM ? zexp : aeM-normcnt+xdenormM+ydenormM+57;
+					de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
 					resultdenorm = |sum & ~|de0 | de0[12];
 					// if z is zero then there was a 56 bit shift of the product
-					sumshifted = resultdenorm ? sum << sumshiftM-zzeroM+isShiftLeft1 : sum << normcnt; // p+2+l
+					sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
 					v = sumshifted[162:109];
 					sticky = (|sumshifted[108:0]) | bsM;
 					//de0 = aeM-normcnt+2-1023;
 				end else begin
-					sumshifted = sum << (1080+aeM);
+					sumshifted = sum << (13'd1080+aeM);
 					v = sumshifted[162:109];
 					sticky = (|sumshifted[108:0]) | bsM;
 					resultdenorm = 1;
@ -96,29 +96,29 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 				// the book says exp = zexp + {-1,0,1}
 				if(sumshiftzeroM) begin
 					v = sum[162:109];
-					sticky = sum[108:0] | bsM;
+					sticky = (|sum[108:0]) | bsM;
-					de0 = zexp;
+					de0 = {2'b0,zexp};
 				end else if(sumshifted[163] & ~sumshifttmp[9])begin
 					v = sumshifted[162:109];
 					sticky = (|sumshifted[108:0]) | bsM;
-					de0 = zexp +2;
+					de0 = {2'b0,zexp} +13'd2;
 				end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
 					v = sumshifted[161:108];
 					sticky = (|sumshifted[107:0]) | bsM;
-					de0 = zexp+1;
+					de0 = {2'b0,zexp}+13'd1;
 				end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
 					v = sumshifted[160:107];
 					sticky = (|sumshifted[106:0]) | bsM;
 					//de0 = zexp-1;
-					de0 = zexp+zdenormM;
+					de0 = {2'b0,zexp}+{12'b0,zdenormM};
 				end else if(sumshifted[160]& ~zdenormM) begin
-					de0 = zexp-1;
+					de0 = {2'b0,zexp}-13'b1;
 					v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
 					sticky = (|sumshifted[105:0]) | bsM;
 					//de0 = zexp-1;
 				end else if(sumshifted[159]& ~zdenormM) begin
 					//v = sumshifted[158:105];
-					de0 = zexp-2;
+					de0 = {2'b0,zexp}-13'd2;
 					v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
 					sticky = (|sumshifted[104:0]) | bsM;
 					//de0 = zexp-1;
@ -126,7 +126,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 					v = sumshifted[160:107];
 					sticky = (|sumshifted[106:0]) | bsM;
 					//de0 = zexp-1;
-					de0 = zexp;
+					de0 = {{2{zexp[62]}},zexp};
 				end else begin
 					de0 = 0;
 					sumshifted = sum << sumshiftM-1; // p+2+l
@ -144,3 +144,4 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 endmodule
--- a/wally-pipelined/src/fpu/round.sv
+++ b/wally-pipelined/src/fpu/round.sv
@ -4,7 +4,7 @@
 // Date:		11/2/1995
 //
 // Block Description: 
-//   This block is responsible for rounding the normalized result of //   the FMAC.   Because prenormalized results may be bypassed back to //   the FMAC X and z inputs, rounding does not appear in the critical //   path of most floating point code.   This is good because rounding //   requires an entire 52 bit carry-propagate half-adder delay.
+//   This block is responsible for rounding the normalized result of //   the FMAC.   Because prenormalized results may be bypassed back to //   the FMAC X and z input logics, rounding does not appear in the critical //   path of most floating point code.   This is good because rounding //   requires an entire 52 bit carry-propagate half-adder delay.
 //
 //   The results from other FPU blocks (e.g. FCVT,  FDIV,  etc)  are also 
 //   muxed in to form the actual result for register file writeback.  This
@ -24,14 +24,14 @@ module round(v, sticky, FrmM, wsign,
 	input logic		[2:0]	FrmM;
 	input logic				wsign;		// Sign of result
 	input logic 		[4:0]	FmaFlagsM;
-	input logic				inf;		// Some input is infinity
+	input logic				inf;		// Some input logic is infinity
-	input logic				nanM;		// Some input is NaN
+	input logic				nanM;		// Some input logic is NaN
 	input logic				xnanM;		// X is NaN
 	input logic				ynanM;		// Y is NaN
 	input logic				znanM;		// Z is NaN
-	input logic		[51:0]		xman;		// Input X
+	input logic		[51:0]		xman;		// input logic X
-	input logic		[51:0]		yman;		// Input Y
+	input logic		[51:0]		yman;		// input logic Y
-	input logic		[51:0]		zman;		// Input Z
+	input logic		[51:0]		zman;		// input logic Z
 	output logic		[51:0]		wman; 		// rounded result of FMAC
 	output logic				infinity;    	// Generate infinity on overflow
 	output logic				specialsel;  	// Select special result
@ -85,7 +85,7 @@ module round(v, sticky, FrmM, wsign,
 	// The special result mux is a 4:1 mux that should not appear in the
 	// critical path of the machine.   It is not priority encoded,  despite
 	// the code below suggesting otherwise.  Also,  several of the identical data
-	// inputs to the wide muxes can be combined at the expense of more
+	// input logics to the wide muxes can be combined at the expense of more
 	// complicated non-critical control in the circuit implementation.
 	assign specialsel =  FmaFlagsM[2] ||  FmaFlagsM[1] ||  FmaFlagsM[4] || //overflow underflow invalid
@ -102,15 +102,15 @@ module round(v, sticky, FrmM, wsign,
 	assign infinityres = infinity ? 52'b0 : {52{1'b1}};
 	// Invalid operations produce a quiet NaN. The result should
-	// propagate an input if the input is NaN. Since we assume all
+	// propagate an input logic if the input logic is NaN. Since we assume all
-	// NaN inputs are already quiet, we don't have to force them quiet.
+	// NaN input logics are already quiet, we don't have to force them quiet.
 	// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
 	// IEEE 754-2008 section 6.2.3 states:
-	// "If two or more inputs are NaN, then the payload of the resulting NaN should be 
+	// "If two or more input logics are NaN, then the payload of the resulting NaN should be 
-	// identical to the payload of one of the input NaNs if representable in the destination
+	// identical to the payload of one of the input logic NaNs if representable in the destination
-	// format. This standard does not specify which of the input NaNs will provide the payload."
+	// format. This standard does not specify which of the input logic NaNs will provide the payload."
 	assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
 	// Select result with 4:1 mux
--- a/wally-pipelined/src/fpu/rounder_denorm.sv
+++ b/wally-pipelined/src/fpu/rounder_denorm.sv
@ -238,7 +238,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
 					( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) 
 					: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) 
 				) : 
-			(op_type[3]) ? exp_A_unmodified : Rexp;
+			(op_type[3]) ? exp_A_unmodified[10:0] : Rexp; //KEP used to be all of exp_A_unmodified
   // If the result is zero or infinity, the mantissa is all zeros. 
   // If the result is NaN, the mantissa is 10...0
--- a/wally-pipelined/src/fpu/rounder_div.sv
+++ b/wally-pipelined/src/fpu/rounder_div.sv
@ -67,6 +67,7 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
   wire 	  sign_rem;
   wire [63:0] 	  q, qm, qp;
   wire 	  exp_ovf, exp_ovfSP, exp_ovfDP;
   logic zero_rem;   
   // Remainder = 0?
   assign zero_rem = ~(|regr_out);
@ -97,7 +98,7 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
   //   1.) we choose any qm0, qp0, q0 (since we shift mant)
   //   2.) we choose qp and we overflow (for RU)
   assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})};
-   assign Texp = exp_diff - {{13{vss}}, ~q1[63]} + {{13{vss}}, mux_mant[1]&qp1[63]&~exp_ovf};
+   assign Texp = exp_diff - {{12{vss}}, ~q1[63]} + {{12{vss}}, mux_mant[1]&qp1[63]&~exp_ovf}; // KEP used to be 13{vss}
   // Overflow only occurs for double precision, if Texp[10] to Texp[0] are 
   // all ones. To encourage sharing with single precision overflow detection,
--- a/wally-pipelined/src/fpu/sbtm2.sv
+++ b/wally-pipelined/src/fpu/sbtm2.sv
@ -13,6 +13,7 @@ module sbtm2 (input logic [11:0] a, output logic [10:0] y);
   logic [14:0] op1;
   logic [14:0] op2;
   logic [14:0] p; 
   logic cout;  
   assign x0 = a[11:7];
   assign x1 = a[6:4];
--- a/wally-pipelined/src/fpu/sign.sv
+++ b/wally-pipelined/src/fpu/sign.sv
@ -25,7 +25,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
 	input logic		[4:0]		FmaFlagsM;				// Round toward minus infinity
 	input logic					sumzero;		// Sum = O
 	input logic					zinfM;			// Y = Inf
-	input logic					inf;			// Some input = Inf
+	input logic					inf;			// Some input logic = Inf
 	output logic					wsign;			// Sign of W 
 	output logic					invz;			// Invert addend into adder
 	output logic					negsum;			// Negate result of adder
@ -36,6 +36,9 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
 	wire					zerosign;    	// sign if result= 0 
 	wire					sumneg;    	// sign if result= 0 
 	wire					infsign;     	// sign if result= Inf 
 logic tmp;
 	logic psign;
 	// Compute sign of product 
 	assign psign = xsign ^ ysign;
@ -55,7 +58,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
 	assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
 	//always @(invz or negsum0 or negsum1 or bsM or ps)
 	//	begin
-	//		if (~invz) begin               // both inputs have same sign  
+	//		if (~invz) begin               // both input logics have same sign  
 	//			negsum = 0;
 	//			selsum1 = 0;
 	//		end else if (bsM) begin        // sticky bit set on addend
@ -80,7 +83,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
 	// Sign calculation is not in the critical path so the cases
 	// can be tolerated. 
 	// IEEE 754-2008 section 6.3 states 
-	// 		"When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
+	// 		"When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
 	// 		also pertaining to negZero it states:
 	//			"When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
 	//			 shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero 
--- a/wally-pipelined/src/fpu/special.sv
+++ b/wally-pipelined/src/fpu/special.sv
@ -60,7 +60,7 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
 	// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
 	// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
 	// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
-	// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
+	// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
 	assign xzeroE = ~(|ReadData1E[62:0]);
 	assign yzeroE = ~(|ReadData2E[62:0]);
 	assign zzeroE = ~(|ReadData3E[62:0]);
--- a/wally-pipelined/src/privileged/privdec.sv
+++ b/wally-pipelined/src/privileged/privdec.sv
@ -28,7 +28,7 @@
 module privdec (
  input  logic [31:20] InstrM,
-  input  logic         PrivilegedM, IllegalIEUInstrFaultM, IllegalCSRAccessM,
+  input  logic         PrivilegedM, IllegalIEUInstrFaultM, IllegalCSRAccessM, IllegalFPUInstrM,
  input  logic [1:0]   PrivilegeModeW, 
  input  logic         STATUS_TSR,
  output logic         IllegalInstrFaultM,
@ -47,7 +47,7 @@ module privdec (
  assign wfiM =       PrivilegedM & (InstrM[31:20] == 12'b000100000101);
  assign sfencevmaM = PrivilegedM & (InstrM[31:25] ==  7'b0001001);
  assign IllegalPrivilegedInstrM = PrivilegedM & ~(uretM|sretM|mretM|ecallM|ebreakM|wfiM|sfencevmaM);
-  assign IllegalInstrFaultM = IllegalIEUInstrFaultM | IllegalPrivilegedInstrM | IllegalCSRAccessM; // *** generalize this for other instructions
+  assign IllegalInstrFaultM = (IllegalIEUInstrFaultM & IllegalFPUInstrM) | IllegalPrivilegedInstrM | IllegalCSRAccessM | IllegalFPUInstrM; // *** generalize this for other instructions
  // *** initially, wfi and sfencevma are nop
  // *** zfenci extension?
--- a/wally-pipelined/src/privileged/privileged.sv
+++ b/wally-pipelined/src/privileged/privileged.sv
@ -46,7 +46,7 @@ module privileged (
  input  logic             PrivilegedM,
  input  logic             ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM,
  input  logic             WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM,
-  input  logic             InstrMisalignedFaultM, IllegalIEUInstrFaultD,
+  input  logic             InstrMisalignedFaultM, IllegalIEUInstrFaultD, IllegalFPUInstrD,
  input  logic             LoadMisalignedFaultM,
  input  logic             StoreMisalignedFaultM,
  input  logic             TimerIntM, ExtIntM, SwIntM,
@ -78,6 +78,7 @@ module privileged (
  logic uretM, sretM, mretM, ecallM, ebreakM, wfiM, sfencevmaM;
  logic IllegalCSRAccessM;
  logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
  logic IllegalFPUInstrE, IllegalFPUInstrM;
  logic LoadPageFaultM, StorePageFaultM; 
  logic InstrPageFaultF, InstrPageFaultD, InstrPageFaultE, InstrPageFaultM;
  logic InstrAccessFaultF, InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
@ -158,12 +159,12 @@ module privileged (
  flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD,
                  {InstrPageFaultF, InstrAccessFaultF},
                  {InstrPageFaultD, InstrAccessFaultD});
-  flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE,
+  flopenrc #(4) faultregE(clk, reset, FlushE, ~StallE,
-                  {IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
+                  {IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD, IllegalFPUInstrD}, // ** vs IllegalInstrFaultInD
-                  {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE});
+                  {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE});
-  flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM,
+  flopenrc #(4) faultregM(clk, reset, FlushM, ~StallM,
-                  {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE},
+                  {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE},
-                  {IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM});
+                  {IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM, IllegalFPUInstrM});
  trap trap(.*);
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@ -96,7 +96,7 @@ module wallypipelinedhart (
  logic       SquashSCW;
  logic [31:0]      FSROutW;
  logic             DivSqrtDoneE;
-  logic             FInvalInstrD;
+  logic             IllegalFPUInstrD;
  logic [`XLEN-1:0] FPUResultW;
  // memory management unit signals
@ -174,7 +174,7 @@ module wallypipelinedhart (
  privileged priv(.*);
-  // fpu fpu(.*); // floating point unit
+   fpu fpu(.*); // floating point unit
  // add FPU here, with SetFflagsM, FRM_REGW
  // presently stub out SetFlagsM and FloatRegWriteW
  //assign SetFflagsM = 0;
--- a/wally-pipelined/testbench/testbench-busybear.sv
+++ b/wally-pipelined/testbench/testbench-busybear.sv
@ -416,18 +416,6 @@ module testbench();
  `CHECK_CSR2(STVAL, `CSRS)
  `CHECK_CSR(STVEC)
              //$stop;
  generate 
    if (`BUSYBEAR == 1) begin
      initial begin //this is temporary until the bug can be fixed!!!
        #11130100;
      force dut.hart.ieu.dp.regf.rf[5] = 64'h0000000080000004;
      #100;
      release dut.hart.ieu.dp.regf.rf[5];
      end
    end 
  endgenerate
  logic speculative;
  initial begin
    speculative = 0;
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@ -52,7 +52,71 @@ module testbench();
  string tests64f[] = '{
    "rv64f/I-FADD-S-01", "2000",
-    "rv64f/I-FCLASS-S-01", "2000"
+    "rv64f/I-FCLASS-S-01", "2000",
    "rv64f/I-FCVT-S-L-01", "2000",
    "rv64f/I-FCVT-S-LU-01", "2000",
    "rv64f/I-FCVT-S-W-01", "2000",
    "rv64f/I-FCVT-S-WU-01", "2000",
    "rv64f/I-FCVT-L-S-01", "2000",
    "rv64f/I-FCVT-LU-S-01", "2000",
    "rv64f/I-FCVT-W-S-01", "2000",
    "rv64f/I-FCVT-WU-S-01", "2000",
    "rv64f/I-FDIV-S-01", "2000",
    "rv64f/I-FEQ-S-01", "2000",
    "rv64f/I-FLE-S-01", "2000",
    "rv64f/I-FLT-S-01", "2000",
    "rv64f/I-FMADD-S-01", "2000",
    "rv64f/I-FMAX-S-01", "2000",
    "rv64f/I-FMIN-S-01", "2000",
    "rv64f/I-FMSUB-S-01", "2000",
    "rv64f/I-FMUL-S-01", "2000",
    "rv64f/I-FMV-W-X-01", "2000",
    "rv64f/I-FMV-X-W-01", "2000",
    "rv64f/I-FNMADD-S-01", "2000",
    "rv64f/I-FNMSUB-S-01", "2000",
    "rv64f/I-FSGNJ-S-01", "2000",
    "rv64f/I-FSGNJN-S-01", "2000",
    "rv64f/I-FSGNJX-S-01", "2000",
    "rv64f/I-FSQRT-S-01", "2000",
    "rv64f/I-FSW-01", "2000",
    "rv64f/I-FLW-01", "2110",
    "rv64f/I-FSUB-S-01", "2000"
  };
  string tests64d[] = '{
    "rv64d/I-FADD-D-01", "2000",
    "rv64d/I-FCLASS-D-01", "2000",
    "rv64d/I-FCVT-D-L-01", "2000",
    "rv64d/I-FCVT-D-LU-01", "2000",
    "rv64d/I-FCVT-D-S-01", "2000",
    "rv64d/I-FCVT-D-W-01", "2000",
    "rv64d/I-FCVT-D-WU-01", "2000",
    "rv64d/I-FCVT-L-D-01", "2000",
    "rv64d/I-FCVT-LU-D-01", "2000",
    "rv64d/I-FCVT-S-D-01", "2000",
    "rv64d/I-FCVT-W-D-01", "2000",
    "rv64d/I-FCVT-WU-D-01", "2000",
    "rv64d/I-FDIV-D-01", "2000",
    "rv64d/I-FEQ-D-01", "2000",
    "rv64d/I-FLD-D-01", "2420",
    "rv64d/I-FLE-D-01", "2000",
    "rv64d/I-FLT-D-01", "2000",
    "rv64d/I-FMADD-D-01", "2000",
    "rv64d/I-FMAX-D-01", "2000",
    "rv64d/I-FMIN-D-01", "2000",
    "rv64d/I-FMSUB-D-01", "2000",
    "rv64d/I-FMUL-D-01", "2000",
    "rv64d/I-FMV-D-X-01", "2000",
    "rv64d/I-FMV-X-D-01", "2000",
    "rv64d/I-FNMADD-D-01", "2000",
    "rv64d/I-FNMSUB-D-01", "2000",
    "rv64d/I-FSD-01", "2000",
    "rv64d/I-FSGNJ-D-01", "2000",
    "rv64d/I-FSGNJN-D-01", "2000",
    "rv64d/I-FSGNJX-D-01", "2000",
    "rv64d/I-FSQRTD-01", "2000",
    "rv64d/I-FSUB-D-01", "2000"
  };
  string tests64a[] = '{
@ -259,6 +323,40 @@ module testbench();
    "rv32i/I-MISALIGN_JMP-01","2000"
  };
 string tests32f[] = '{
    "rv32f/I-FADD-S-01", "2000",
    "rv32f/I-FCLASS-S-01", "2000",
    "rv32f/I-FCVT-S-L-01", "2000",
    "rv32f/I-FCVT-S-LU-01", "2000",
    "rv32f/I-FCVT-S-W-01", "2000",
    "rv32f/I-FCVT-S-WU-01", "2000",
    "rv32f/I-FCVT-L-S-01", "2000",
    "rv32f/I-FCVT-LU-S-01", "2000",
    "rv32f/I-FCVT-W-S-01", "2000",
    "rv32f/I-FCVT-WU-S-01", "2000",
    "rv32f/I-FDIV-S-01", "2000",
    "rv32f/I-FEQ-S-01", "2000",
    "rv32f/I-FLE-S-01", "2000",
    "rv32f/I-FLT-S-01", "2000",
    "rv32f/I-FMADD-S-01", "2000",
    "rv32f/I-FMAX-S-01", "2000",
    "rv32f/I-FMIN-S-01", "2000",
    "rv32f/I-FMSUB-S-01", "2000",
    "rv32f/I-FMUL-S-01", "2000",
    "rv32f/I-FMV-W-X-01", "2000",
    "rv32f/I-FMV-X-W-01", "2000",
    "rv32f/I-FNMADD-S-01", "2000",
    "rv32f/I-FNMSUB-S-01", "2000",
    "rv32f/I-FSGNJ-S-01", "2000",
    "rv32f/I-FSGNJN-S-01", "2000",
    "rv32f/I-FSGNJX-S-01", "2000",
    "rv32f/I-FSQRT-S-01", "2000",
    "rv32f/I-FSW-01", "2000",
    "rv32f/I-FLW-01", "2110",
    "rv32f/I-FSUB-S-01", "2000"
  };
  string tests32i[] = {
    "rv32i/I-ADD-01", "2000",
    "rv32i/I-ADDI-01","2000",
@ -617,11 +715,13 @@ module instrNameDecTB(
  logic [2:0] funct3;
  logic [6:0] funct7;
  logic [11:0] imm;
  logic [4:0] rs2;
  assign op = instr[6:0];
  assign funct3 = instr[14:12];
  assign funct7 = instr[31:25];
  assign imm = instr[31:20];
  assign rs2 = instr[24:20];
  // it would be nice to add the operands to the name 
  // create another variable called decoded
@ -745,6 +845,67 @@ module instrNameDecTB(
                       else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D";
                       else                              name = "ILLEGAL";
      10'b0001111_???: name = "FENCE";
      10'b1000011_???: name = "FMADD";
      10'b1000111_???: name = "FMSUB";
      10'b1001011_???: name = "FNMSUB";
      10'b1001111_???: name = "FNMADD";
      10'b1010011_000: if      (funct7[6:2] == 5'b00000) name = "FADD";
                       else if (funct7[6:2] == 5'b00001) name = "FSUB";
                       else if (funct7[6:2] == 5'b00010) name = "FMUL";
                       else if (funct7[6:2] == 5'b00011) name = "FDIV";
                       else if (funct7[6:2] == 5'b01011) name = "FSQRT";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
                       else if (funct7 == 7'b1110000 && rs2 == 5'b00000) name = "FMV.X.W";
                       else if (funct7 == 7'b1111000 && rs2 == 5'b00000) name = "FMV.W.X";
                       else if (funct7 == 7'b1110001 && rs2 == 5'b00000) name = "FMV.X.W"; // DOUBLE
                       else if (funct7 == 7'b1111001 && rs2 == 5'b00000) name = "FMV.W.X"; // DOUBLE
                       else if (funct7[6:2] == 5'b00100) name = "FSGNJ";
                       else if (funct7[6:2] == 5'b00101) name = "FMIN";
                       else if (funct7[6:2] == 5'b10100) name = "FLE";
                       else                              name = "ILLEGAL";
      10'b1010011_001: if      (funct7[6:2] == 5'b00000) name = "FADD";
                       else if (funct7[6:2] == 5'b00001) name = "FSUB";
                       else if (funct7[6:2] == 5'b00010) name = "FMUL";
                       else if (funct7[6:2] == 5'b00011) name = "FDIV";
                       else if (funct7[6:2] == 5'b01011) name = "FSQRT";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
                       else if (funct7[6:2] == 5'b00100) name = "FSGNJN";
                       else if (funct7[6:2] == 5'b00101) name = "FMAX";
                       else if (funct7[6:2] == 5'b10100) name = "FLT";
                       else if (funct7[6:2] == 5'b11100) name = "FCLASS";
                       else                              name = "ILLEGAL";
      10'b0101111_010: if      (funct7[6:2] == 5'b00000) name = "FADD";
                       else if (funct7[6:2] == 5'b00001) name = "FSUB";
                       else if (funct7[6:2] == 5'b00010) name = "FMUL";
                       else if (funct7[6:2] == 5'b00011) name = "FDIV";
                       else if (funct7[6:2] == 5'b01011) name = "FSQRT";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
                       else if (funct7[6:2] == 5'b00100) name = "FSGNJX";
                       else if (funct7[6:2] == 5'b10100) name = "FEQ";
                       else                              name = "ILLEGAL";
      10'b1010011_???: if      (funct7[6:2] == 5'b00000) name = "FADD";
                       else if (funct7[6:2] == 5'b00001) name = "FSUB";
                       else if (funct7[6:2] == 5'b00010) name = "FMUL";
                       else if (funct7[6:2] == 5'b00011) name = "FDIV";
                       else if (funct7[6:2] == 5'b01011) name = "FSQRT";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
                       else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
                       else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
                       else                              name = "ILLEGAL";
      10'b0000111_010: name = "FLW";
      10'b0100111_010: name = "FSW";
      10'b0000111_010: name = "FLD";
      10'b0100111_010: name = "FSD";
      default:         name = "ILLEGAL";
    endcase
 endmodule
`@ -1 +1 @@`
	`testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat`	`testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat`