Merge branch 'main' into cache

Conflicts: wally-pipelined/src/cache/dmapped.sv wally-pipelined/src/cache/line.sv wally-pipelined/src/ifu/icache.sv
2025-02-11 06:05:49 +00:00 · 2021-04-14 18:24:32 -04:00 · 2021-04-14 18:24:32 -04:00 · c1e2e58ebe
commit c1e2e58ebe
parent 4ae1df1290 8f7ddcfdff
42 changed files with 136624 additions and 307799 deletions
--- a/wally-pipelined/misc/tlb_toy/tlb_testbench.sv
+++ b/wally-pipelined/misc/tlb_toy/tlb_testbench.sv
--- a/wally-pipelined/misc/tlb_toy/tlb_toy.sv.OLD
+++ b/wally-pipelined/misc/tlb_toy/tlb_toy.sv.OLD
--- a/wally-pipelined/misc/tlb_toy/tlb_toy.tv
+++ b/wally-pipelined/misc/tlb_toy/tlb_toy.tv
--- a/wally-pipelined/src/cache/dmapped.sv
+++ b/wally-pipelined/src/cache/dmapped.sv
@ -75,14 +75,14 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par

    // Assign the read and write addresses in cache memory
    always_comb begin
-        assign ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
-        assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
-        assign ReadSet = ReadPAdr[SETEND:SETBEGIN];
-        assign ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];
+        ReadOffset = OldReadPAdr[OFFSETEND:OFFSETBEGIN];
+        ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
+        ReadSet = ReadPAdr[SETEND:SETBEGIN];
+        ReadTag = OldReadPAdr[TAGEND:TAGBEGIN];

-        assign WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
-        assign WriteSet = WritePAdr[SETEND:SETBEGIN];
-        assign WriteTag = WritePAdr[TAGEND:TAGBEGIN];
+        WriteOffset = WritePAdr[OFFSETEND:OFFSETBEGIN];
+        WriteSet = WritePAdr[SETEND:SETBEGIN];
+        WriteTag = WritePAdr[TAGEND:TAGBEGIN];
    end

    // Depth is number of bits in one "word" of the memory, width is number of such words
--- a/wally-pipelined/src/ebu/pagetablewalker.sv
+++ b/wally-pipelined/src/ebu/pagetablewalker.sv
@ -61,7 +61,7 @@ module pagetablewalker (
  output logic             MMUTranslationComplete,

  // Faults
-  output logic             InstrPageFaultM, LoadPageFaultM, StorePageFaultM
+  output logic             InstrPageFaultF, LoadPageFaultM, StorePageFaultM
 );

  // Internal signals
@ -85,7 +85,7 @@ module pagetablewalker (

  // Signals for direct, fake translations. Not part of the final Wally version.
  logic [`XLEN-1:0]     DirectInstrPTE, DirectMemPTE;
-  logic [9:0]           DirectPTEFlags = {2'b0, 8'b00001111};
+  localparam            DirectPTEFlags = {2'b0, 8'b00001111};

  logic [`VPN_BITS-1:0] PCPageNumber, MemAdrPageNumber;

@ -133,17 +133,22 @@ module pagetablewalker (
  assign PageTypeF = PageType;
  assign PageTypeM = PageType;

+  localparam IDLE = 3'h0;
+  localparam LEVEL1 = 3'h1;
+  localparam LEVEL0 = 3'h2;
+  localparam LEAF = 3'h3;
+  localparam FAULT = 3'h4;
+
+  logic [2:0] WalkerState, NextWalkerState;
+
  generate
    if (`XLEN == 32) begin
      logic [9:0] VPN1, VPN0;

      assign SvMode = SATP_REGW[31];

-      typedef enum {IDLE, LEVEL1, LEVEL0, LEAF, FAULT} walker_statetype;
-      walker_statetype WalkerState, NextWalkerState;
-
      // *** Do we need a synchronizer here for walker to talk to ahblite?
-      flopenl #(.TYPE(walker_statetype)) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
+      flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);

      // State transition logic
      always_comb begin
@ -154,7 +159,8 @@ module pagetablewalker (
                //  else if (~ValidPTE || (LeafPTE && BadMegapage))
                //                                   NextWalkerState = FAULT;
                // *** Leave megapage implementation for later
-                //  else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;
+                // *** need to check if megapage valid/aligned
+                  else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;
                  else if (ValidPTE && ~LeafPTE)   NextWalkerState = LEVEL0;
                  else                             NextWalkerState = FAULT;
          LEVEL0: if      (~MMUReady)              NextWalkerState = LEVEL0;
@ -165,6 +171,8 @@ module pagetablewalker (
                  else                             NextWalkerState = IDLE;
          FAULT:  if      (MMUTranslate)           NextWalkerState = LEVEL1;
                  else                             NextWalkerState = IDLE;
+          // Default case should never happen, but is included for linter.
+          default:                                 NextWalkerState = IDLE;
        endcase
      end

@ -179,38 +187,41 @@ module pagetablewalker (
      // Assign combinational outputs
      always_comb begin
        // default values
-        assign TranslationPAdr = '0;
-        assign PageTableEntry = '0;
-        assign PageType ='0;
-        assign MMUTranslationComplete = '0;
-        assign DTLBWriteM = '0;
-        assign ITLBWriteF = '0;
-        assign InstrPageFaultM = '0;
-        assign LoadPageFaultM = '0;
-        assign StorePageFaultM = '0;
+        TranslationPAdr = '0;
+        PageTableEntry = '0;
+        PageType ='0;
+        MMUTranslationComplete = '0;
+        DTLBWriteM = '0;
+        ITLBWriteF = '0;
+        InstrPageFaultF = '0;
+        LoadPageFaultM = '0;
+        StorePageFaultM = '0;

        case (NextWalkerState)
          LEVEL1: begin
-            assign TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
+            TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
          end
          LEVEL0: begin
-            assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
+            TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
          end
          LEAF: begin
            // Keep physical address alive to prevent HADDR dropping to 0
-            assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
-            assign PageTableEntry = CurrentPTE;
-            assign PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
-            assign MMUTranslationComplete = '1;
-            assign DTLBWriteM = DTLBMissM;
-            assign ITLBWriteF = ~DTLBMissM;  // Prefer data over instructions
+            TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
+            PageTableEntry = CurrentPTE;
+            PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
+            MMUTranslationComplete = '1;
+            DTLBWriteM = DTLBMissM;
+            ITLBWriteF = ~DTLBMissM;  // Prefer data over instructions
          end
          FAULT: begin
-            assign TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
-            assign MMUTranslationComplete = '1;
-            assign InstrPageFaultM = ~DTLBMissM;
-            assign LoadPageFaultM = DTLBMissM && ~MemStore;
-            assign StorePageFaultM = DTLBMissM && MemStore;
+            TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
+            MMUTranslationComplete = '1;
+            InstrPageFaultF = ~DTLBMissM;
+            LoadPageFaultM = DTLBMissM && ~MemStore;
+            StorePageFaultM = DTLBMissM && MemStore;
+          end
+          default: begin
+            // nothing
          end
        endcase
      end
@ -226,30 +237,30 @@ module pagetablewalker (
      assign MMUPAdr = TranslationPAdr[31:0];

    end else begin
+      localparam LEVEL2 = 3'h5;
+
      assign SvMode = SATP_REGW[63];

      logic [8:0] VPN2, VPN1, VPN0;

      logic GigapageMisaligned, BadGigapage;

-      typedef enum {IDLE, LEVEL2, LEVEL1, LEVEL0, LEAF, FAULT} walker_statetype;
-      walker_statetype WalkerState, NextWalkerState;
-
      // *** Do we need a synchronizer here for walker to talk to ahblite?
-      flopenl #(.TYPE(walker_statetype)) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
+      flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);

      always_comb begin
        case (WalkerState)
          IDLE:   if      (MMUTranslate)           NextWalkerState = LEVEL2;
                  else                             NextWalkerState = IDLE;
          LEVEL2: if      (~MMUReady)              NextWalkerState = LEVEL2;
+                  else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;
                  else if (ValidPTE && ~LeafPTE)   NextWalkerState = LEVEL1;
                  else                             NextWalkerState = FAULT;
          LEVEL1: if      (~MMUReady)              NextWalkerState = LEVEL1;
                //  else if (~ValidPTE || (LeafPTE && BadMegapage))
                //                                   NextWalkerState = FAULT;
                // *** Leave megapage implementation for later
-                //  else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;
+                  else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;
                  else if (ValidPTE && ~LeafPTE)   NextWalkerState = LEVEL0;
                  else                             NextWalkerState = FAULT;
          LEVEL0: if      (~MMUReady)              NextWalkerState = LEVEL0;
@ -260,6 +271,8 @@ module pagetablewalker (
                  else                             NextWalkerState = IDLE;
          FAULT:  if      (MMUTranslate)           NextWalkerState = LEVEL2;
                  else                             NextWalkerState = IDLE;
+          // Default case should never happen, but is included for linter.
+          default:                                 NextWalkerState = IDLE;
        endcase
      end

@ -279,42 +292,45 @@ module pagetablewalker (
      // *** Should translate this flop block into our flop module notation
      always_comb begin
        // default values
-        assign TranslationPAdr = '0;
-        assign PageTableEntry = '0;
-        assign PageType = '0;
-        assign MMUTranslationComplete = '0;
-        assign DTLBWriteM = '0;
-        assign ITLBWriteF = '0;
-        assign InstrPageFaultM = '0;
-        assign LoadPageFaultM = '0;
-        assign StorePageFaultM = '0;
+        TranslationPAdr = '0;
+        PageTableEntry = '0;
+        PageType = '0;
+        MMUTranslationComplete = '0;
+        DTLBWriteM = '0;
+        ITLBWriteF = '0;
+        InstrPageFaultF = '0;
+        LoadPageFaultM = '0;
+        StorePageFaultM = '0;

        case (NextWalkerState)
          LEVEL2: begin
-            assign TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
+            TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
          end
          LEVEL1: begin
-            assign TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
+            TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
          end
          LEVEL0: begin
-            assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
+            TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
          end
          LEAF: begin
            // Keep physical address alive to prevent HADDR dropping to 0
-            assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
-            assign PageTableEntry = CurrentPTE;
-            assign PageType = (WalkerState == LEVEL2) ? 2'b11 : 
+            TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
+            PageTableEntry = CurrentPTE;
+            PageType = (WalkerState == LEVEL2) ? 2'b11 : 
                                ((WalkerState == LEVEL1) ? 2'b01 : 2'b00);
-            assign MMUTranslationComplete = '1;
-            assign DTLBWriteM = DTLBMissM;
-            assign ITLBWriteF = ~DTLBMissM;  // Prefer data over instructions
+            MMUTranslationComplete = '1;
+            DTLBWriteM = DTLBMissM;
+            ITLBWriteF = ~DTLBMissM;  // Prefer data over instructions
          end
          FAULT: begin
-            assign TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
-            assign MMUTranslationComplete = '1;
-            assign InstrPageFaultM = ~DTLBMissM;
-            assign LoadPageFaultM = DTLBMissM && ~MemStore;
-            assign StorePageFaultM = DTLBMissM && MemStore;
+            TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
+            MMUTranslationComplete = '1;
+            InstrPageFaultF = ~DTLBMissM;
+            LoadPageFaultM = DTLBMissM && ~MemStore;
+            StorePageFaultM = DTLBMissM && MemStore;
+          end
+          default: begin
+            // nothing
          end
        endcase
      end
@ -331,4 +347,4 @@ module pagetablewalker (
    end
  endgenerate

-endmodule
+endmodule
--- a/wally-pipelined/src/fpu/FMA/add.sv
+++ b/wally-pipelined/src/fpu/FMA/add.sv
@ -48,7 +48,7 @@ module add(r, s, t, sum,

 	// Compound adder
 	// Consists of 3:2 CSA followed by long compound CPA
-	assign prodshifted = killprod ? 0 : {56'b0, r2, 2'b0} + {56'b0, s2, 2'b0};
+	assign prodshifted = killprod ? 0 : {56'b0, r2+s2, 2'b0};
 	assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
 	assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above
 	
--- a/wally-pipelined/src/fpu/FMA/align.sv
+++ b/wally-pipelined/src/fpu/FMA/align.sv
@ -56,7 +56,7 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
 	// addend on right shifts.  Handle special cases of shifting
 	// by too much.

-	always @(aligncnt or zman or zdenorm)
+	always @(aligncnt or xzero or yzero or zman or zdenorm or zzero)
 		begin

 		// Default to clearing sticky bits 
@ -67,26 +67,23 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
 		killprod = xzero | yzero;
 		// d = aligncnt
 		// p = 53
-		if ($signed(aligncnt) <= $signed(-103)) begin //d<=-2p+1
+		if ($signed(aligncnt) <= $signed(-105)) begin //d<=-2p+1
 			//product ancored case with saturated shift
 			sumshift = 163;	// 3p+4	
 			sumshiftzero = 0;
-			shift = {~zdenorm,zman,163'b0} >> sumshift;
+			shift = {1'b1,zman,163'b0} >> sumshift;
 			t = zzero ? 0 : {shift[215:52]};
 			bs = |(shift[51:0]);
 			//zexpsel = 0;
-		end else if($signed(aligncnt) <= $signed(1))  begin // -2p+1<d<=2
-			// set d<=2 to d<=0
+		end else if($signed(aligncnt) <= $signed(2))  begin // -2p+1<d<=2
 			// product ancored or cancellation
-			// warning: set to 55 rather then 56. was there a typo in the book?
-			sumshift = 57-aligncnt; // p + 3 - d  
+			sumshift = 57-aligncnt; // p + 2 - d  
 			sumshiftzero = 0;
 			shift = {~zdenorm,zman,163'b0} >> sumshift;
 			t = zzero ? 0 : {shift[215:52]};
 			bs = |(shift[51:0]);
 			//zexpsel = 0;
 		end else if ($signed(aligncnt)<=$signed(55))  begin // 2 < d <= p+2
-			// another typo in book? above was 55 changed to 52
 			// addend ancored case
 			// used to be 56 \/ somthing doesn't seem right too many typos
 			sumshift = 57-aligncnt;
--- a/wally-pipelined/src/fpu/FMA/booth.sv
+++ b/wally-pipelined/src/fpu/FMA/booth.sv
@ -0,0 +1,55 @@
+module booth(xExt, choose, add1, e, pp); 
+/////////////////////////////////////////////////////////////////////////////
+    
+	input 		[53:0]		xExt;				// multiplicand	xExt
+	input		[2:0]		choose;				// bits needed to choose which encoding
+	output		[1:0]       	add1;				// do you add 1	
+    output                  e;
+	output		[54:0]		pp;				//	the resultant encoding
+    
+    logic [54:0] pp, temp;
+    logic e;
+    logic [1:0] add1;
+    logic [53:0] negx;
+    //logic temp;
+
+    assign negx = ~xExt;
+
+    always @(choose, xExt, negx)
+    case (choose)
+        3'b000 : pp = 55'b0;   //  0
+        3'b001 : pp = {1'b0, xExt};  //  1
+        3'b010 : pp = {1'b0, xExt};  //  1
+        3'b011 : pp = {xExt, 1'b0};  //  2
+        3'b100 : pp = {negx, 1'b0};  // -2
+        3'b101 : pp = {1'b1, negx};  // -1
+        3'b110 : pp = {1'b1, negx};  // -1
+        3'b111 : pp = 55'hfffffffffffffff;  //  -0
+    endcase
+
+    always @(choose, xExt, negx)
+    case (choose)
+        3'b000 : e = 0;   //  0
+        3'b001 : e = 0;  //  1
+        3'b010 : e = 0;  //  1
+        3'b011 : e = 0;  //  2
+        3'b100 : e = 1;  // -2
+        3'b101 : e = 1;  // -1
+        3'b110 : e = 1;  // -1
+        3'b111 : e = 1;  //  -0
+    endcase
+    // assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
+    // assign add1 = choose[2];
+    always @(choose)
+    case (choose)
+        3'b000 : add1 = 2'b0;   //  0
+        3'b001 : add1 = 2'b0;  //  1
+        3'b010 : add1 = 2'b0;  //  1
+        3'b011 : add1 = 2'b0;  //  2
+        3'b100 : add1 = 2'b10;  // -2
+        3'b101 : add1 = 2'b1;  // -1
+        3'b110 : add1 = 2'b1;  // -1
+        3'b111 : add1 = 2'b1;  //  -0
+    endcase
+
+endmodule
--- a/wally-pipelined/src/fpu/FMA/compressors.sv
+++ b/wally-pipelined/src/fpu/FMA/compressors.sv
@ -0,0 +1,90 @@
+module add3comp2(a, b, c, carry, sum); 
+/////////////////////////////////////////////////////////////////////////////
+//look into diffrent implementations of the compressors?
+    
+    parameter BITS = 4;
+	input 		[BITS-1:0]		a;
+	input		[BITS-1:0]		b;
+	input		[BITS-1:0]    	c;
+    output      [BITS-1:0]      carry;
+	output		[BITS-1:0]		sum;
+    genvar i;
+
+    generate
+        for(i= 0; i<BITS; i=i+1) begin
+            sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
+        end
+    endgenerate
+
+endmodule
+
+module add4comp2(a, b, c, d, carry, sum); 
+/////////////////////////////////////////////////////////////////////////////
+    
+    parameter BITS = 4;
+	input 		[BITS-1:0]		a;
+	input		[BITS-1:0]		b;
+	input		[BITS-1:0]    	c;
+	input		[BITS-1:0]    	d;
+    output      [BITS:0]      carry;
+	output		[BITS-1:0]		sum;
+
+    logic       [BITS-1:0]      cout;
+    logic                       carryTmp;
+    genvar i;
+
+
+    sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
+
+    generate
+        for(i= 1; i<BITS-1; i=i+1) begin
+            sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
+        end
+    endgenerate
+
+
+    sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
+
+    assign carry[BITS-1] = carryTmp & cout[BITS-1];
+    assign carry[BITS] = carryTmp ^ cout[BITS-1];
+
+endmodule
+
+module sng3comp2(a, b, c, carry, sum); 
+/////////////////////////////////////////////////////////////////////////////
+//look into diffrent implementations of the compressors?
+    
+	input 				a;
+	input				b;
+	input		       	c;
+    output              carry;
+	output				sum;
+    
+    logic               axorb;
+
+    assign axorb = a ^ b;
+    assign sum = axorb ^ c;
+
+    assign carry = axorb ? c : a;
+
+endmodule
+
+module sng4comp2(a, b, c, d, cin, cout, carry, sum); 
+/////////////////////////////////////////////////////////////////////////////
+//look into pass gate 4:2 counters?
+    
+	input 				a;
+	input				b;
+	input		       	c;
+    input               d;
+    input               cin;
+    output              cout;
+    output              carry;
+	output				sum;
+    
+    logic               TmpSum;
+
+    sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
+    sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
+
+endmodule
--- a/wally-pipelined/src/fpu/FMA/expgen.sv
+++ b/wally-pipelined/src/fpu/FMA/expgen.sv
@ -17,7 +17,7 @@
 /////////////////////////////////////////////////////////////////////////////
 module expgen(xexp, yexp, zexp,
 			   killprod,  sumzero, resultdenorm, normcnt, infinity, 
-			   invalid, overflow, underflow, inf, xzero, yzero,expplus1,
+			   FmaFlagsM, inf, xzero, yzero,expplus1,
 			   nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
 			   aligncnt, wexp,
 			   prodof, sumof, sumuf, denorm0, ae);
@ -31,9 +31,7 @@ module expgen(xexp, yexp, zexp,
 	input     			resultdenorm;  // postnormalize rounded result
 	input     	[8:0]  		normcnt;     	// normalization shift count 
 	input     			infinity;    	// generate infinity on overflow 
-	input     			invalid;     	// Result invalid
-	input     			overflow;    	// Result overflowed
-	input     			underflow;   	// Result underflowed 
+	input     	[4:0]	FmaFlagsM;     	// Result invalid
 	input     			inf;			// Some input is infinity
 	input     			nan;			// Some input is NaN
 	input     	[12:0]		de0;			// X is NaN NaN
@ -121,10 +119,10 @@ module expgen(xexp, yexp, zexp,
 	// produces either infinity or the largest finite number, depending on the
 	// rounding mode.  NaNs are propagated or generated.

-	assign specialres = invalid | nan ? nanres : // KEP added nan
-					overflow ? infinityres : 
+	assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
+					FmaFlagsM[2] ? infinityres : 	//overflow
 					inf ? 11'b11111111111 :
-					underflow ? 11'b0 : 11'bx;
+					FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow

 	assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;

--- a/wally-pipelined/src/fpu/FMA/flag.sv
+++ b/wally-pipelined/src/fpu/FMA/flag.sv
@ -10,12 +10,13 @@
 /////////////////////////////////////////////////////////////////////////////
 module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 			 psign,  zsign, xzero, yzero, zzero, vbits, killprod,
-			 inf, nan, invalid, overflow, underflow, inexact);
+			 inf, nan, FmaFlagsM,sticky);
 /////////////////////////////////////////////////////////////////////////////

 	input                  		xnan;        	// X is NaN 
 	input                  		ynan;        	// Y is NaN 
 	input                 		znan;       	// Z is NaN 
+	input                  		sticky;        	// X is Inf
 	input                  		xinf;        	// X is Inf
 	input                 		yinf;       	// Y is Inf 
 	input                  		zinf;        	// Z is Inf
@ -31,10 +32,7 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
 	input     	[1:0]  		vbits;		// R and S bits of result
 	output				inf;		// Some	source is Inf
 	output				nan;		// Some	source is NaN
-	output				invalid;	// Result is invalid	
-	output				overflow;	// Result overflowed	
-	output				underflow;	// Result underflowed	
-	output				inexact;	// Result is not an exact number
+	output		[4:0]	FmaFlagsM;
 
 	//   Internal nodes

@ -55,33 +53,36 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,

 	assign prodinf = prodof && ~xnan && ~ynan;
 	//KEP added if the product is infinity then sum is infinity
-	assign suminf = prodinf | sumof && ~xnan && ~ynan && ~znan;
+	assign suminf = sumof && ~xnan && ~ynan && ~znan;

 	// Set invalid flag for following cases:
 	//   1) Inf - Inf
 	//   2) 0 * Inf
 	//   3) Output = NaN (this is not part of the IEEE spec,  only 486 proj)

-	assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
+	assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
 					   xzero && yinf || yzero && xinf;// KEP remove case 3) above

+	assign FmaFlagsM[3] = 0; // divide by zero flag
+
+
 	// Set the overflow flag for the following cases:
 	//   1) Rounded multiply result would be out of bounds
 	//   2) Rounded add result would be out of bounds

-	assign overflow = suminf && ~inf;
+	assign FmaFlagsM[2] = suminf && ~inf;

 	// Set the underflow  flag for the following cases:
 	//   1) Any input is denormalized
 	//   2)  Output would be denormalized or smaller

-	assign underflow = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
+	assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));

 	// Set the inexact flag for the following cases:
 	//   1) Multiplication inexact
 	//   2) Addition  inexact
 	// One of these cases occurred if the R or S bit is set

-	assign inexact = (vbits[0] || vbits[1]  || suminf) && ~(inf || nan);
+	assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky  || suminf) && ~(inf || nan);

 endmodule
--- a/wally-pipelined/src/fpu/FMA/fmac.sv
+++ b/wally-pipelined/src/fpu/FMA/fmac.sv
@ -15,13 +15,13 @@
 //    normalize Normalization shifter
 //    round     Rounding of result
 //    exception Handles exceptional cases
-//    bypass    Handles bypass of result to X or Z inputs
+//    bypass    Handles bypass of result to ReadData1E or ReadData3E inputs
 //    sign      One bit sign handling block 
 //    special   Catch special cases (inputs = 0  / infinity /  etc.) 
 //
-//   The FMAC computes W=X*Y+Z, rounded with the mode specified by
+//   The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
 //   RN, RZ, RM, or RP.  The result is optionally bypassed back to
-//   the X or Z inputs for use on the next cycle.  In addition,  four signals
+//   the ReadData1E or ReadData3E inputs for use on the next cycle.  In addition,  four signals
 //   are produced: trap, overflow, underflow, and inexact.  Trap indicates
 //   an infinity, NaN, or denormalized number to be handled in software;
 //   the other three signals are IEEE flags.
@ -29,29 +29,17 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module fmac(x, y, z, rn, rz, rp, rm,
-			earlyres, earlyressel, bypsel, bypplus1, byppostnorm, 
-			w, wbypass, invalid, overflow, underflow, inexact);
+module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
+			FmaResultM, FmaFlagsM, aligncnt);
 /////////////////////////////////////////////////////////////////////////////
 
-	input 		[63:0]		x;			// input X from reg file
-	input		[63:0]		y;				// input Y  
-	input 		[63:0]		z;          	// input Z from reg file 
-	input 			 		rn;          	// Round to Nearest
-	input 					rz;           	// Round toward zero
-	input 					rm;          	// Round toward minus infinity
-	input 					rp;          	// Round toward plus infinity
-	input 		[63:0]		earlyres;    	// Early result from other FP logic
-	input 					earlyressel;	// Select early result, not W 
-	input 		[1:0]		bypsel;     	// Select W bypass to X, or z 
-	input 					bypplus1;    	// Add one in bypass
-	input 					byppostnorm;	// postnormalize in bypass
-	output 		[63:0]		w;           	// output W=X*Y+Z
-	output 		[63:0]		wbypass;     	// prerounded output W=X*Y+Z for bypass
-	output 					invalid;    	// Result is invalid 
-	output					overflow;		// Result overflowed 
-	output					underflow;   	// Result underflowed
-	output 					inexact;     	// Result is not an exact number 
+	input 		[63:0]		ReadData1E;		// input 1
+	input		[63:0]		ReadData2E;     // input 2 
+	input 		[63:0]		ReadData3E;     // input 3
+	input 		[2:0]	 	FrmE;          	// Rounding mode
+	output 		[63:0]		FmaResultM;     // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
+	output 		[4:0]		FmaFlagsM;    	// status flags
+	output 		[12:0]		aligncnt;    	// status flags

 // Internal nodes
 
@ -60,12 +48,12 @@ module fmac(x, y, z, rn, rz, rp, rm,
 	logic 		[163:0]		t;				// output of alignment shifter
 	logic 		[163:0]		sum;			// output of carry prop adder
 	logic 		[53:0]		v; 				// normalized sum, R, S bits
-	logic 		[12:0]		aligncnt; 		// shift count for alignment
+//	logic 		[12:0]		aligncnt; 		// shift count for alignment
 	logic 		[8:0]		normcnt; 		// shift count for normalizer
 	logic 		[12:0]		ae; 		// multiplier expoent
 	logic 					bs;				// sticky bit of addend
 	logic 					ps;				// sticky bit of product
-	logic 					killprod; 		// Z >> product
+	logic 					killprod; 		// ReadData3E >> product
 	logic 					negsum; 		// negate sum
 	logic 					invz; 			// invert addend
 	logic 					selsum1; 		// select +1 mode of sum
@ -73,7 +61,7 @@ module fmac(x, y, z, rn, rz, rp, rm,
 	logic 					negsum1; 		// sum +1 < 0
 	logic 					sumzero; 		// sum = 0
 	logic 					infinity; 		// generate infinity on overflow
-	logic 					prodof; 		// X*Y out of range
+	logic 					prodof; 		// ReadData1E*ReadData2E out of range
 	logic 					sumof;			// result out of range
 	logic					xzero;
 	logic					yzero;
@ -101,6 +89,9 @@ module fmac(x, y, z, rn, rz, rp, rm,
 	logic			[8:0]		sumshift;
 	logic					sumshiftzero;
 	logic			[12:0]		de0;
+	logic					isAdd;
+
+	assign isAdd = 1;



@ -117,16 +108,16 @@ module fmac(x, y, z, rn, rz, rp, rm,

 //   Instantiate fraction datapath

-	multiply		multiply(.xman(x[51:0]), .yman(y[51:0]), .*);
-	align			align(.zman(z[51:0]),.*);
+	multiply		multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
+	align			align(.zman(ReadData3E[51:0]),.*);
 	add				add(.*);
 	lza				lza(.*);
-	normalize		normalize(.zexp(z[62:52]),.*); 
-	round			round(.xman(x[51:0]), .yman(y[51:0]),.zman(z[51:0]), .wman(w[51:0]),.wsign(w[63]),.*);
+	normalize		normalize(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*); 
+	round			round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);

 // Instantiate exponent datapath

-	expgen			expgen(.xexp(x[62:52]),.yexp(y[62:52]),.zexp(z[62:52]),.wexp(w[62:52]),.*);
+	expgen			expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
 // Instantiate special case detection across datapath & exponent path 

 	special			special(.*);
@ -134,8 +125,8 @@ module fmac(x, y, z, rn, rz, rp, rm,

 // Instantiate control logic
 
-sign				sign(.xsign(x[63]),.ysign(y[63]),.zsign(z[63]),.wsign(w[63]),.*); 
-flag				flag(.zsign(z[63]),.vbits(v[1:0]),.*); 
+sign				sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*); 
+flag				flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*); 

 endmodule

--- a/wally-pipelined/src/fpu/FMA/lza.sv
+++ b/wally-pipelined/src/fpu/FMA/lza.sv
@ -30,7 +30,7 @@ module lza(sum, normcnt, sumzero);
 	always @ ( sum)
 		begin
 			i =   0;
-			while (~sum[108-i] && i < 108) i = i+1;  // search for leading one 
+			while (~sum[163-i] && i <= 163) i = i+1;  // search for leading one 
 			normcnt = i;    // compute shift count
 	end

--- a/wally-pipelined/src/fpu/FMA/multiply.sv
+++ b/wally-pipelined/src/fpu/FMA/multiply.sv
@ -10,8 +10,124 @@ module multiply(xman, yman, xdenorm, ydenorm, xzero, yzero, r, s);
 	input     			yzero;		// Z is denorm
 	output		[105:0]		r;				//	partial product 1	
 	output		[105:0]		s;				//	partial product 2	
+    
+     wire        [54:0]      yExt; //y with appended 0 and assumed 1
+     wire        [53:0]      xExt; //y with assumed 1
+     wire [26:0][1:0] add1;
+     wire [26:0][54:0] pp; 
+     wire [26:0] e;
+     logic [17:0][105:0] lv1add;
+     logic [11:0][105:0] lv2add;
+     logic [7:0][105:0] lv3add;
+     logic [3:0][105:0] lv4add;
+     logic [21:0][106:0] carryTmp;
+     wire [26:0][105:0] acc; 
+     // wire [105:0] acc
+    genvar i;	

-	assign r = 106'b0;
-	assign s = {53'b0,~(xdenorm|xzero),xman}  *  {53'b0,~(ydenorm|yzero),yman};
+	assign xExt = {2'b0,~(xdenorm|xzero),xman};
+	assign yExt = {2'b0,~(ydenorm|yzero),yman, 1'b0};
+    
+     generate
+        for(i=0; i<27; i=i+1) begin
+            booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
+        end
+     endgenerate

+    assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; 
+    assign acc[1] = {50'b01,~e[1],pp[1],add1[0]}; 
+    assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
+    assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
+    assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
+    assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
+    assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
+    assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
+    assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
+    assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
+    assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
+    assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
+    assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
+    assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
+    assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
+    assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
+    assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
+    assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
+    assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
+    assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
+    assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
+    assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
+    assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
+    assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
+    assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
+    assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
+    assign acc[26] = {pp[26],add1[25], 50'b0};
+
+    //*** resize adders
+     generate
+        for(i=0; i<9; i=i+1) begin
+            add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), 
+                                           .carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
+            assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
+        end
+     endgenerate
+
+     generate
+        for(i=0; i<6; i=i+1) begin
+            add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), 
+                                           .carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
+            assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
+        end
+     endgenerate
+
+    generate
+        for(i=0; i<4; i=i+1) begin
+            add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), 
+                                            .carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
+            assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
+        end
+    endgenerate
+
+
+    generate
+        for(i=0; i<2; i=i+1) begin
+            add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
+                                            .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
+            assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
+        end
+    endgenerate
+
+    add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
+                                    .carry(carryTmp[21]), .sum(s));
+    assign r = {carryTmp[21][104:0], 1'b0};
+		// assign r = 0;
+		// assign s = acc[0] +
+		// 		   acc[1] +
+		// 		   acc[2] +
+		// 		   acc[3] +
+		// 		   acc[4] +
+		// 		   acc[5] +
+		// 		   acc[6] +
+		// 		   acc[7] +
+		// 		   acc[8] +
+		// 		   acc[9] +
+		// 		   acc[10] +
+		// 		   acc[11] +
+		// 		   acc[12] +
+		// 		   acc[13] +
+		// 		   acc[14] +
+		// 		   acc[15] +
+		// 		   acc[16] +
+		// 		   acc[17] +
+		// 		   acc[18] +
+		// 		   acc[19] +
+		// 		   acc[20] +
+		// 		   acc[21] +
+		// 		   acc[22] +
+		// 		   acc[23] +
+		// 		   acc[24] +
+		// 		   acc[25] +
+		// 		   acc[26];
+
+			// assign s = {53'b0,~(xdenorm|xzero),xman}  *  {53'b0,~(ydenorm|yzero),yman};
+			// assign r = 0;
 endmodule
--- a/wally-pipelined/src/fpu/FMA/normalize.sv
+++ b/wally-pipelined/src/fpu/FMA/normalize.sv
@ -14,9 +14,11 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v); 
+module normalize(sum, xexp, yexp, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, zzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v); 
 /////////////////////////////////////////////////////////////////////////////
 	input     	[163:0]  	sum;            // sum
+	input     	[62:52]  	xexp;            // sum
+	input     	[62:52]  	yexp;            // sum
 	input     	[62:52]  	zexp;            // sum
 	input		[8:0] 		normcnt;     	// normalization shift count
 	input		[12:0] 		ae;     	// normalization shift count
@ -33,6 +35,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
 	input                  		zdenorm;        // Input Z is denormalized
 	input				xzero;
 	input				yzero;
+	input				zzero;
 	output				sticky;		//sticky bit
 	output		[12:0]		de0;
 	output                  	resultdenorm;        // Input Z is denormalized
@ -47,6 +50,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
 	logic		[9:0]		sumshifttmp;
 	logic       	[163:0]  	sumshiftedtmp;     // shifted sum
 	logic 				sticky;
+	logic				isShiftLeft1;
 logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;

 	// When the sum is zero,  normalization does not apply and only the
@ -60,21 +64,23 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 	// The sticky bit calculation is actually built into the shifter and
 	// does not require a true subtraction shown in the model.
 
+	assign isShiftLeft1 = (aligncnt == 1 ||aligncnt == 0 || $signed(aligncnt) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
 	assign tmp = ($signed(ae-normcnt+2) >= $signed(-1022));
-	always @(sum or sumshift or ae or aligncnt or normcnt or bs or zexp or zdenorm)
+	always @(sum or sumshift or ae or aligncnt or normcnt or bs or isShiftLeft1 or zexp or zdenorm)
 		begin
 		// d = aligncnt
 		// l = normcnt
 		// p = 53
 		// ea + eb = ae
 			// set d<=2 to d<=0
-			if ($signed(aligncnt)<=$signed(1))  begin //d<=2 
+			if ($signed(aligncnt)<=$signed(2))  begin //d<=2 
 				// product anchored or cancellation
 				if ($signed(ae-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
 					//normal result
-					de0 = xzero|yzero ? zexp : ae-normcnt+2+xdenorm+ydenorm;
-					resultdenorm = |sum & ~|de0;
-					sumshifted = resultdenorm ? sum << sumshift : sum << (55+normcnt); // p+2+l
+					de0 = xzero|yzero ? zexp : ae-normcnt+xdenorm+ydenorm+57;
+					resultdenorm = |sum & ~|de0 | de0[12];
+					// if z is zero then there was a 56 bit shift of the product
+					sumshifted = resultdenorm ? sum << sumshift-zzero+isShiftLeft1 : sum << normcnt; // p+2+l
 					v = sumshifted[162:109];
 					sticky = (|sumshifted[108:0]) | bs;
 					//de0 = ae-normcnt+2-1023;
@ -90,8 +96,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 				sumshifttmp = {1'b0,sumshift} - 2;
 				sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
 				tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
-				tmp2 = (sumshifttmp[9] || sumshifted[162]);
-				tmp3 = sumshifted[161];
+				tmp2 = ((sumshifttmp[9] & sumshift[0]) || sumshifted[162]);
+				tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshift[1]));
 				tmp4 = sumshifted[160];
 				tmp5 = sumshifted[159];
 				// for some reason use exp = zexp + {0,1,2}
@ -112,25 +118,31 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
 					v = sumshifted[160:107];
 					sticky = (|sumshifted[106:0]) | bs;
 					//de0 = zexp-1;
-					de0 = zexp;
-				end else if(sumshifted[160]) begin
-					v = sumshifted[159:106];
+					de0 = zexp+zdenorm;
+				end else if(sumshifted[160]& ~zdenorm) begin
+					de0 = zexp-1;
+					v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
 					sticky = (|sumshifted[105:0]) | bs;
 					//de0 = zexp-1;
-					de0 = zexp-1;
-				end else if(sumshifted[159]) begin
-					v = sumshifted[158:105];
+				end else if(sumshifted[159]& ~zdenorm) begin
+					//v = sumshifted[158:105];
+					de0 = zexp-2;
+					v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
 					sticky = (|sumshifted[104:0]) | bs;
 					//de0 = zexp-1;
-					de0 = zexp-2;
-				end else begin					
+				end else if(zdenorm) begin					
 					v = sumshifted[160:107];
 					sticky = (|sumshifted[106:0]) | bs;
 					//de0 = zexp-1;
 					de0 = zexp;
+				end else begin
+					de0 = 0;
+					sumshifted = sum << sumshift-1; // p+2+l
+					v = sumshifted[162:109];
+					sticky = (|sumshifted[108:0]) | bs;
 				end

-				resultdenorm = ~(|de0);
+				resultdenorm = (~|de0 | de0[12]);
 		end 
 	end

--- a/wally-pipelined/src/fpu/FMA/round.sv
+++ b/wally-pipelined/src/fpu/FMA/round.sv
@ -13,22 +13,17 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module round(v, sticky, rz, rn, rp, rm, wsign,
-			  invalid, overflow, underflow, inf, nan, xnan, ynan, znan, 
+module round(v, sticky, FrmE, wsign,
+			  FmaFlagsM, inf, nan, xnan, ynan, znan, 
 			  xman, yman, zman,
 			  wman, infinity, specialsel,expplus1);
 /////////////////////////////////////////////////////////////////////////////

 	input		[53:0]		v;		// normalized sum, R, S bits
 	input				sticky;		//sticky bit
-	input				rz;		// Round toward zero
-	input				rn;		// Round toward	nearest
-	input				rp;		// Round toward	plus infinity
-	input				rm;		// Round toward	minus infinity
+	input		[2:0]	FrmE;
 	input				wsign;		// Sign of result
-	input 				invalid;	// Trap on infinity, NaN, denorm
-	input				overflow;	// Result overflowed
-	input				underflow;	// Result underflowed
+	input 		[4:0]	FmaFlagsM;
 	input				inf;		// Some input is infinity
 	input				nan;		// Some input is NaN
 	input				xnan;		// X is NaN
@ -45,7 +40,7 @@ module round(v, sticky, rz, rn, rp, rm, wsign,

 	// Internal nodes

-	wire				plus1;		// Round by adding one 
+	logic				plus1;		// Round by adding one 
 	wire		[52:0]		v1;		// Result + 1 (for rounding)
 	wire		[51:0]		specialres;	// Result of exceptional case 
 	wire		[51:0]		infinityres;	// Infinity or largest real number
@ -62,9 +57,19 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
 	//	0xx - do nothing
 	//	100 - tie - plus1 if v[2] = 1
 	//	101/110/111 - plus1
-	assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
-		       (rp & ~wsign) |
-		       (rm & wsign);
+	always @ (FrmE, v, wsign, sticky) begin
+		case (FrmE)
+			3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
+			3'b001: plus1 = 0;//round to zero
+			3'b010: plus1 = wsign;//round down
+			3'b011: plus1 = ~wsign;//round up
+			3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
+			default: plus1 = 1'bx;
+		endcase
+	end
+	// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
+	// 	       (rp & ~wsign) |
+	// 	       (rm & wsign);
 	//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
 	//				 rp && ~wsign && (v[1] || v[0]) ||
 	//				 rm && wsign && (v[1] || v[0]);
@ -84,17 +89,17 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
 	// inputs to the wide muxes can be combined at the expense of more
 	// complicated non-critical control in the circuit implementation.

-	assign specialsel =  overflow || underflow || invalid ||
+	assign specialsel =  FmaFlagsM[2] ||  FmaFlagsM[1] ||  FmaFlagsM[4] || //overflow underflow invalid
 							nan || inf;
-	assign specialres = invalid | nan ? nanres : //KEP added nan
-						 overflow ? infinityres : 
+	assign specialres = FmaFlagsM[4] | nan ? nanres : //invalid
+						 FmaFlagsM[2] ? infinityres : //overflow
 						 inf ? 52'b0 :
-						underflow ? 52'b0 : 52'bx;  // default to undefined 
+						 FmaFlagsM[1] ? 52'b0 : 52'bx;  // underflow

 	// Overflow is handled differently for different rounding modes
 	// Round is to either infinity or to maximum finite number

-	assign infinity = rn || (rp && ~wsign) || (rm && wsign);
+	assign infinity =  |FrmE;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
 	assign infinityres = infinity ? 52'b0 : {52{1'b1}};

 	// Invalid operations produce a quiet NaN. The result should
--- a/wally-pipelined/src/fpu/FMA/sign.sv
+++ b/wally-pipelined/src/fpu/FMA/sign.sv
@ -10,23 +10,24 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm, overflow,
-			 sumzero, nan, invalid, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign);
+module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, FrmE, FmaFlagsM, zzero,
+			 sumzero, nan, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign, isAdd);
 ////////////////////////////////////////////////////////////////////////////I
 
 	input					xsign;			// Sign of X 
 	input					ysign;			// Sign of Y 
 	input					zsign;			// Sign of Z
+	input					zzero;
+	input					isAdd;
 	input					negsum0;		// Sum in +O mode is negative 
 	input					negsum1;		// Sum in +1 mode is negative 
 	input					bs;				// sticky bit from addend
 	input					ps;				// sticky bit from product
 	input					killprod;		// Product forced to zero
-	input					rm;				// Round toward minus infinity
-	input					overflow;				// Round toward minus infinity
+	input		[2:0]		FrmE;				// Round toward minus infinity
+	input		[4:0]		FmaFlagsM;				// Round toward minus infinity
 	input					sumzero;		// Sum = O
 	input					nan;			// Some input is NaN
-	input					invalid;		// Result invalid
 	input					xinf;			// X = Inf
 	input					yinf;			// Y = Inf
 	input					zinf;			// Y = Inf
@ -96,10 +97,24 @@ logic tmp;
 	//			 shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero 
 	//			 sum/difference shall be -0.  However, x+x = x-(-X) retains the same sign as x even when x is zero."
 
-	assign zerosign = (~invz && killprod) ? zsign : rm;
+	//assign zerosign = (~invz && killprod) ? zsign : rm;//***look into
+//	assign zerosign = (~invz && killprod) ? zsign : 0;
+	// zero sign
+	//	if product underflows then use psign
+	//	otherwise
+	//		addition
+	//			if cancelation then 0 unless round to -inf
+	//			otherwise psign
+	//		subtraction
+	//			if cancelation then 0 unless round to -inf
+	//			otherwise psign
+
+	assign zerosign = FmaFlagsM[1] ? psign :
+			  (isAdd ? (psign^zsign ? FrmE == 3'b010 : psign) :
+				  (psign^zsign ? psign : FrmE == 3'b010));
 	assign infsign = zinf ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
 	//assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
-	assign tmp = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
-	assign wsign = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
+	assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
+	assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));

 endmodule
--- a/wally-pipelined/src/fpu/FMA/special.sv
+++ b/wally-pipelined/src/fpu/FMA/special.sv
@ -10,49 +10,49 @@
 /////////////////////////////////////////////////////////////////////////////

 /////////////////////////////////////////////////////////////////////////////
-module special(x, y, z, ae, xzero, yzero, zzero,
+module special(ReadData1E, ReadData2E, ReadData3E, ae, xzero, yzero, zzero,
 				xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, xinf, yinf, zinf);
 /////////////////////////////////////////////////////////////////////////////

-	input   	[63:0]     	x;              // Input x
-	input     	[63:0]     	y;           	// Input Y
-	input      	[63:0]    	z;            	// Input z 
+	input   	[63:0]     	ReadData1E;              // Input ReadData1E
+	input     	[63:0]     	ReadData2E;           	// Input ReadData2E
+	input      	[63:0]    	ReadData3E;            	// Input ReadData3E 
 	input		[12:0]		ae;		// exponent of product
-	output				xzero;		// Input x = 0
-	output				yzero;		// Input y = 0
-	output				zzero;		// Input z = 0
-	output				xnan;		// x is NaN
-	output				ynan;		// y is NaN
-	output				znan;		// z is NaN
-	output				xdenorm;	// x is denormalized
-	output				ydenorm;	// y is denormalized
-	output				zdenorm;	// z is denormalized
+	output				xzero;		// Input ReadData1E = 0
+	output				yzero;		// Input ReadData2E = 0
+	output				zzero;		// Input ReadData3E = 0
+	output				xnan;		// ReadData1E is NaN
+	output				ynan;		// ReadData2E is NaN
+	output				znan;		// ReadData3E is NaN
+	output				xdenorm;	// ReadData1E is denormalized
+	output				ydenorm;	// ReadData2E is denormalized
+	output				zdenorm;	// ReadData3E is denormalized
 	output				proddenorm;	// product is denormalized
-	output				xinf;		// x is infinity
-	output				yinf;		// y is infinity
-	output				zinf;		// z is infinity
+	output				xinf;		// ReadData1E is infinity
+	output				yinf;		// ReadData2E is infinity
+	output				zinf;		// ReadData3E is infinity

 	// In the actual circuit design, the gates looking at bits
 	// 51:0 and at bits 62:52 should be shared among the various detectors.

 	// Check if input is NaN

-	assign xnan = &x[62:52] && |x[51:0]; 
-	assign ynan = &y[62:52] && |y[51:0]; 
-	assign znan = &z[62:52] && |z[51:0];
+	assign xnan = &ReadData1E[62:52] && |ReadData1E[51:0]; 
+	assign ynan = &ReadData2E[62:52] && |ReadData2E[51:0]; 
+	assign znan = &ReadData3E[62:52] && |ReadData3E[51:0];

 	// Check if input is denormalized

-	assign xdenorm = ~(|x[62:52]) && |x[51:0]; 
-	assign ydenorm = ~(|y[62:52]) && |y[51:0]; 
-	assign zdenorm = ~(|z[62:52]) && |z[51:0];
+	assign xdenorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0]; 
+	assign ydenorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0]; 
+	assign zdenorm = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
 	assign proddenorm = &ae & ~xzero & ~yzero; //KEP is the product denormalized

 	// Check if input is infinity

-	assign xinf = &x[62:52] && ~(|x[51:0]); 
-	assign yinf = &y[62:52] && ~(|y[51:0]); 
-	assign zinf = &z[62:52] && ~(|z[51:0]);
+	assign xinf = &ReadData1E[62:52] && ~(|ReadData1E[51:0]); 
+	assign yinf = &ReadData2E[62:52] && ~(|ReadData2E[51:0]); 
+	assign zinf = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);

 	// Check if inputs are all zero
 	// Also forces denormalized inputs to zero.
@ -60,11 +60,11 @@ module special(x, y, z, ae, xzero, yzero, zzero,
 	// to just check if the exponent is zero.
 	
 	// KATHERINE - commented following (21/01/11)
-	// assign xzero = ~(|x[62:0]) || xdenorm;
-	// assign yzero = ~(|y[62:0]) || ydenorm;
-	// assign zzero = ~(|z[62:0]) || zdenorm;
+	// assign xzero = ~(|ReadData1E[62:0]) || xdenorm;
+	// assign yzero = ~(|ReadData2E[62:0]) || ydenorm;
+	// assign zzero = ~(|ReadData3E[62:0]) || zdenorm;
 	// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
-	assign xzero = ~(|x[62:0]);
-	assign yzero = ~(|y[62:0]);
-	assign zzero = ~(|z[62:0]);
+	assign xzero = ~(|ReadData1E[62:0]);
+	assign yzero = ~(|ReadData2E[62:0]);
+	assign zzero = ~(|ReadData3E[62:0]);
 endmodule
--- a/wally-pipelined/src/fpu/FMA/tbgen/results.dat
+++ b/wally-pipelined/src/fpu/FMA/tbgen/results.dat
@ -1,16 +1 @@
-0010000000000000 bf4fdffffff7fffe 800ffffffffffffe 800003fbfffffefe 801003fbfffffefe  Wrong zdenorm 308227
-0010000000000000 be6fffffbffffff7 8000000000000000 800000001fffffc0 800000000fffffe0  Wrong 313753
-001ffffffffffffe 3fddfbffffffffff 000ffffffffffffe 000efdfffffffffd 001efdfffffffffd  Wrong zdenorm 551371
-3befe000ffffffff 800ffffffffffffe 0000000000000000 0000000000000000 8000000000000000  Wrong ydenorm unflw 665575
-000007fffffffffe 3f6ffffffe01fffe 000ffffffffffffe 00000007ffffff7e 00100007ffffff7e  Wrong xdenorm zdenorm 768727
-3fdffffffffffffe 000ffffffffffffe 8000000000000001 7feffffffffffff6 0007fffffffffffe  Wrong ydenorm zdenorm 1049939
-7fe0000000000001 4000000000000000 ffefffffffffffff 7ff0000000000000 7cb8000000000000  Wrong w=+inf 2602745
-000fff000000000f 3ff00800001fffff 8010000000000000 7f7bfe007ff8381e 000006ff801ffe0e  Wrong xdenorm 3117277
-8000000000000001 40211275ffe5ee3c 0000000000000001 fcfe24ebffcbdc78 8000000000000008  Wrong xdenorm zdenorm 3148591
-801fffffffffffff bfdffffffffffffe 0000000000021fff 0000000000021ffe 0010000000021ffe  Wrong zdenorm 3537867
-801ffffffffffffe 0010000000000001 0000000000000000 0000000000000000 8000000000000000  Wrong unflw 3564269
-bca0000000000001 000fffffc000001e 8000000000000000 8000000000000001 8000000000000000  Wrong ydenorm 3717769
-bcafffffffffffff 800ffffffffffffe 8000000000000000 0000000000000002 0000000000000001  Wrong ydenorm 3807413
-7fec5fed92358a74 400000001bffffff ffefc0003ffffffe 7ff0000000000000 7fe8ffdb47bad466  Wrong w=+inf 3889689
-bfdfffffffffffff 3fdf1f3616aa73e1 3fd0000000000001 3fd07064f4aac611 3f7c193d2ab1843f  Wrong 4099063
-3fd07dfffffffffe 8010000000000001 0000000000000001 ffe07dfffffffffb 80041f7fffffffff  Wrong zdenorm 4716133
+c3f000200003fffe 0000000000000001 001ffffffffffffe 80cffc400007fffd 80cffc400007fffc  Wrong FmaResultM=  -64 ydenorm 1119653
--- a/wally-pipelined/src/fpu/FMA/tbgen/tb
+++ b/wally-pipelined/src/fpu/FMA/tbgen/tb
--- a/wally-pipelined/src/fpu/FMA/tbgen/tb.c
+++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.c
@ -20,19 +20,19 @@ void main() {
 		// b68ffff8000000ff_3f9080000007ffff_b6307ffbe0080080_00001
                char ch;
 		int i,j,n;
-		char x[17];
-		char y[17];
-		char z[17];
+		char ReadData1E[17];
+		char ReadData2E[17];
+		char ReadData3E[17];
 		char ans[81];
 		char flags[3];
-		int rn,rz,rm,rp;
-		long stop = 4099063;
+		int FrmE;
+		long stop = 1119653;
 		int debug = 1;
 		//my_string = (char *) malloc (nbytes + 1);
 		//bytes_read = getline (&my_string, &nbytes, stdin);
 	

-		for(n=0; n < 613; n++) {//613 for 10000
+		for(n=0; n < 305; n++) {//613 for 10000
 			if(getline(&ln,&nbytes,fp) < 0 || feof(fp)) break;
 			if(k == stop && debug == 1) break;
 			k++;
@ -41,71 +41,59 @@ void main() {

 		if(!feof(fp)) {

-			strncpy(x,   ln,     16); x[16]=0;
-			strncpy(y,    &ln[17], 16); y[16]=0;
-			strncpy(z,  &ln[34], 16); z[16]=0;
-			// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
+			strncpy(ReadData1E,   ln,     16); ReadData1E[16]=0;
+			strncpy(ReadData2E,    &ln[17], 16); ReadData2E[16]=0;
+			strncpy(ReadData3E,  &ln[34], 16); ReadData3E[16]=0;
+			// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
 			strncpy(ans,  &ln[51], 16); ans[16]=0;
 			strncpy(flags,&ln[68],2);   flags[2]=0;
 		
-			// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
-			fprintf(fq,"    x = 64'h%s;\n",x); 
-			fprintf(fq,"    y = 64'h%s;\n",y); 
-			fprintf(fq,"    z = 64'h%s;\n",z);
+			// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
+			fprintf(fq,"    ReadData1E = 64'h%s;\n",ReadData1E); 
+			fprintf(fq,"    ReadData2E = 64'h%s;\n",ReadData2E); 
+			fprintf(fq,"    ReadData3E = 64'h%s;\n",ReadData3E);
 			fprintf(fq,"    ans = 64'h%s;\n", ans);
 			// fprintf(fq,"    flags = 5'h%s;\n", flags);
 		

 			{
 				//rn=1; rz=0; rm=0; rp=0;
-				fprintf(fq,"    rn = %d;\n",1);
-				fprintf(fq,"    rz = %d;\n", 0);
-				fprintf(fq,"    rm = %d;\n", 0);
-				fprintf(fq,"    rp = %d;\n", 0);
-			}
-			{
-				fprintf(fq,"    earlyres = 64'b0;\n");
-				fprintf(fq,"    earlyressel = 0;\n");
-			}		
-			{
-
-				fprintf(fq,"    bypsel= 2'b0;\n"); //, bysel);
-				fprintf(fq,"    bypplus1 = 0;\n"); //, byp1);
-				fprintf(fq,"    byppostnorm = 0;\n"); //, bypnorm);
+				fprintf(fq,"    FrmE = 3'b000;\n");
 			}
 			fprintf(fq,"#10\n");
 			// IEEE 754-2008 section 6.3 states "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
-			//fprintf(fq,"	$fwrite(fp, \"%%h %%h %%h %%h \",x,y,w, ans);\n");	
+			//fprintf(fq,"	$fwrite(fp, \"%%h %%h %%h %%h \",ReadData1E,ReadData2E,FmaResultM, ans);\n");	
 			fprintf(fq,"    // IEEE 754-2008 section 6.3 states: \"When ether an input or result is NaN, this\n");
 			fprintf(fq,"    //                                     standard does not interpret the sign of a NaN.\"\n");
-			fprintf(fq,"	wnan = &w[62:52] && |w[51:0]; \n");
-			fprintf(fq,"	xnan = &x[62:52] && |x[51:0]; \n");
-			fprintf(fq,"	ynan = &y[62:52] && |y[51:0]; \n");
-			fprintf(fq,"	znan = &z[62:52] && |z[51:0]; \n");
+			fprintf(fq,"	wnan = &FmaResultM[62:52] && |FmaResultM[51:0]; \n");
+			fprintf(fq,"	xnan = &ReadData1E[62:52] && |ReadData1E[51:0]; \n");
+			fprintf(fq,"	ynan = &ReadData2E[62:52] && |ReadData2E[51:0]; \n");
+			fprintf(fq,"	znan = &ReadData3E[62:52] && |ReadData3E[51:0]; \n");
 			fprintf(fq,"	ansnan = &ans[62:52] && |ans[51:0]; \n");
-			fprintf(fq,"	xnorm = ~(|x[62:52]) && |x[51:0] ? {x[50:0], 1'b0} : x; \n");
-			fprintf(fq,"	ynorm = ~(|y[62:52]) && |y[51:0] ? {y[50:0], 1'b0} : y;\n");
-			fprintf(fq,"	s = ({54'b1,xnorm} + (bypsel  && bypplus1))  *  {54'b1,ynorm}; \n");
-			// fprintf(fq,"    if(!(~(|x[62:52]) && |x[51:0] || ~(|y[62:52]) && |y[51:0])) begin\n"); 
+			fprintf(fq,"	xnorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0] ? {ReadData1E[50:0], 1'b0} : ReadData1E; \n");
+			fprintf(fq,"	ynorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0] ? {ReadData2E[50:0], 1'b0} : ReadData2E;\n");
+			// fprintf(fq,"	s = ({54'b1,xnorm} + (bypsel  && bypplus1))  *  {54'b1,ynorm}; \n");
+			// fprintf(fq,"    if(!(~(|ReadData1E[62:52]) && |ReadData1E[51:0] || ~(|ReadData2E[62:52]) && |ReadData2E[51:0])) begin\n"); 
 																							// not looknig at negative zero results right now
-			//fprintf(fq,"	  if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) && !(w == 64'h8000000000000000 && ans == 64'b0)) begin\n"); 
-			// fprintf(fq,"	if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) ) begin\n"); 
-			fprintf(fq,"	if((!wnan && (w != ans)) || (wnan && ansnan && ~(((xnan && (w[62:0] == {x[62:52],1'b1,x[50:0]})) || (ynan && (w[62:0] == {y[62:52],1'b1,y[50:0]}))  || (znan && (w[62:0] == {z[62:52],1'b1,z[50:0]})) || (w[62:0] == ans[62:0])) ))) begin\n"); 
-			fprintf(fq,"		$fwrite(fp, \"%%h %%h %%h %%h %%h  Wrong \",x,y, z, w, ans);\n");
+			//fprintf(fq,"	  if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) && !(FmaResultM == 64'h8000000000000000 && ans == 64'b0)) begin\n"); 
+			// fprintf(fq,"	if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) ) begin\n"); 
+			fprintf(fq,"	if((!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {ReadData1E[62:52],1'b1,ReadData1E[50:0]})) || (ynan && (FmaResultM[62:0] == {ReadData2E[62:52],1'b1,ReadData2E[50:0]}))  || (znan && (FmaResultM[62:0] == {ReadData3E[62:52],1'b1,ReadData3E[50:0]})) || (FmaResultM[62:0] == ans[62:0])) ))) begin\n"); 
+			fprintf(fq,"		$fwrite(fp, \"%%h %%h %%h %%h %%h  Wrong \",ReadData1E,ReadData2E, ReadData3E, FmaResultM, ans);\n");
 			//fprintf(fq,"		$fwrite(fp, \"%%h \",s);\n");
-			fprintf(fq,"		if(w == 64'h8000000000000000) $fwrite(fp, \"w=-zero \");\n");
-			fprintf(fq,"		if(~(|x[62:52]) && |x[51:0]) $fwrite(fp, \"xdenorm \");\n");
-			fprintf(fq,"		if(~(|y[62:52]) && |y[51:0]) $fwrite(fp, \"ydenorm \");\n");
-			fprintf(fq,"		if(~(|z[62:52]) && |z[51:0]) $fwrite(fp, \"zdenorm \");\n");
-			fprintf(fq,"		if(invalid != 0) $fwrite(fp, \"invld \");\n");
-			fprintf(fq,"		if(overflow != 0) $fwrite(fp, \"ovrflw \");\n");
-			fprintf(fq,"		if(underflow != 0) $fwrite(fp, \"unflw \");\n");
-			fprintf(fq,"		if(w == 64'hFFF0000000000000) $fwrite(fp, \"w=-inf \");\n");
-			fprintf(fq,"		if(w == 64'h7FF0000000000000) $fwrite(fp, \"w=+inf \");\n");
-			fprintf(fq,"		if(w >  64'h7FF0000000000000 && w <  64'h7FF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
-			fprintf(fq,"		if(w >  64'hFFF8000000000000 && w <  64'hFFF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
-			fprintf(fq,"		if(w >= 64'h7FF8000000000000 && w <= 64'h7FFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
-			fprintf(fq,"		if(w >= 64'hFFF8000000000000 && w <= 64'hFFFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
+			fprintf(fq,"		$fwrite(fp, \"FmaResultM=%%d \",$signed(aligncnt));\n");
+			fprintf(fq,"		if(FmaResultM == 64'h8000000000000000) $fwrite(fp, \"FmaResultM=-zero \");\n");
+			fprintf(fq,"		if(~(|ReadData1E[62:52]) && |ReadData1E[51:0]) $fwrite(fp, \"xdenorm \");\n");
+			fprintf(fq,"		if(~(|ReadData2E[62:52]) && |ReadData2E[51:0]) $fwrite(fp, \"ydenorm \");\n");
+			fprintf(fq,"		if(~(|ReadData3E[62:52]) && |ReadData3E[51:0]) $fwrite(fp, \"zdenorm \");\n");
+			fprintf(fq,"		if(FmaFlagsM[4] != 0) $fwrite(fp, \"invld \");\n");
+			fprintf(fq,"		if(FmaFlagsM[2] != 0) $fwrite(fp, \"ovrflw \");\n");
+			fprintf(fq,"		if(FmaFlagsM[1] != 0) $fwrite(fp, \"unflw \");\n");
+			fprintf(fq,"		if(FmaResultM == 64'hFFF0000000000000) $fwrite(fp, \"FmaResultM=-inf \");\n");
+			fprintf(fq,"		if(FmaResultM == 64'h7FF0000000000000) $fwrite(fp, \"FmaResultM=+inf \");\n");
+			fprintf(fq,"		if(FmaResultM >  64'h7FF0000000000000 && FmaResultM <  64'h7FF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
+			fprintf(fq,"		if(FmaResultM >  64'hFFF8000000000000 && FmaResultM <  64'hFFF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
+			fprintf(fq,"		if(FmaResultM >= 64'h7FF8000000000000 && FmaResultM <= 64'h7FFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
+			fprintf(fq,"		if(FmaResultM >= 64'hFFF8000000000000 && FmaResultM <= 64'hFFFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");

 			fprintf(fq,"		if(ans == 64'hFFF0000000000000) $fwrite(fp, \"ans=-inf \");\n");
 			fprintf(fq,"		if(ans == 64'h7FF0000000000000) $fwrite(fp, \"ans=+inf \");\n");
--- a/wally-pipelined/src/fpu/FMA/tbgen/tb.v
+++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.v
--- a/wally-pipelined/src/fpu/FMA/tbgen/tbhead.v
+++ b/wally-pipelined/src/fpu/FMA/tbgen/tbhead.v
@ -2,38 +2,27 @@
 module tb;


- reg 		[63:0]		x;
- reg 		[63:0]		y;
- reg 		[63:0]		z;
- reg 		[63:0]		ans;
- reg 						rn;
- reg 						rz;
- reg 						rm;
- reg 						rp;
- reg 		[63:0]		earlyres;
- reg 						earlyressel;
- reg 		[1:0]			bypsel;
- reg 						bypplus1;
- reg 						byppostnorm;
- wire 	[63:0]		w;
- wire 	[63:0]		wbypass;
- wire 		 			invalid;
- wire 					overflow;
- wire 					underflow;
- wire 					inexact;
+ reg 	[63:0]		ReadData1E;
+ reg 	[63:0]		ReadData2E;
+ reg 	[63:0]		ReadData3E;
+ reg 	[63:0]		ans;
+ reg 	[2:0]		FrmE;
+ wire 	[63:0]		FmaResultM;
+ wire 	[4:0]	 	FmaFlagsM;

 integer fp;
 reg wnan;
 reg xnan;
 reg ynan;
 reg znan;
+wire [12:0] aligncnt;
 reg ansnan;
 reg		[105:0]		s;				//	partial product 2	
 reg		[51:0] 		xnorm;
 reg 		[51:0] 		ynorm;

 localparam period = 20;  
-fmac UUT(.*);
+fma UUT(.*);


 initial 
--- a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
@ -1 +1 @@
-testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
+testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat
--- a/wally-pipelined/src/fpu/csa.sv
+++ b/wally-pipelined/src/fpu/csa.sv
@ -50,7 +50,7 @@ module FA_array (S, C, A, B, Ci) ;
   genvar 	  i;
   generate
      for (i = 0; i < n; i = i + 1) begin : index
-	 fa FA1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]), .Ci(Ci[i]));
+	 fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
      end
   endgenerate
   
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -22,6 +22,7 @@ module fpu (
  //signals, modules, and combinational logic closely defined.

  //used for OSU DP-size hardware to wally XLEN interfacing
+
  integer XLENDIFF;
  assign XLENDIFF = `XLEN - 64;
  integer XLENDIFFN;
@ -465,13 +466,18 @@ module fpu (
  always_comb begin
           
  //zero extension  
-  if(`XLEN > 64) begin
-      FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
-  end
+
+// Teo 04/13/2021
+// Commented out XLENDIFF{1'b0} due to error:
+// Repetition multiplier must be constant.
+
+  //if(`XLEN > 64) begin
+  //    FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
+  //end
  //truncate
-  else begin
+  //else begin
      FPUResultW <= FPUResultDirW[63:64-`XLEN];
-  end
+  //end

  end  

--- a/wally-pipelined/src/mmu/cam_line.sv
+++ b/wally-pipelined/src/mmu/cam_line.sv
@ -24,9 +24,6 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////

-`include "wally-config.vh"
-`include "wally-constants.vh"
-
 module cam_line #(parameter KEY_BITS = 20,
                  parameter HIGH_SEGMENT_BITS = 10) (
  input                 clk, reset,
@ -74,6 +71,6 @@ module cam_line #(parameter KEY_BITS = 20,
  // should automatically match.
  page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, VirtualPageNumberQuery);

-  assign Match = ({1'b1, VirtualPageNumberQuery} == Key);
+  assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key});

-endmodule
+endmodule
--- a/wally-pipelined/src/mmu/tlb.sv
+++ b/wally-pipelined/src/mmu/tlb.sv
@ -4,8 +4,8 @@
 // Written: jtorrey@hmc.edu 16 February 2021
 // Modified:
 //
-// Purpose: Example translation lookaside buffer
-//           Cache of virtural-to-physical address translations
+// Purpose: Translation lookaside buffer
+//          Cache of virtural-to-physical address translations
 // 
 // A component of the Wally configurable RISC-V project.
 // 
@ -24,9 +24,6 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////

-`include "wally-config.vh"
-`include "wally-constants.vh"
-
 /**
 * sv32 specs
 * ----------
@ -52,6 +49,9 @@
 *   least recently)
 */

+`include "wally-config.vh"
+`include "wally-constants.vh"
+
 // The TLB will have 2**ENTRY_BITS total entries
 module tlb #(parameter ENTRY_BITS = 3) (
  input              clk, reset,
@ -127,7 +127,8 @@ module tlb #(parameter ENTRY_BITS = 3) (
  assign PageOffset        = VirtualAddress[11:0];

  // Currently use random replacement algorithm
-  tlb_rand rdm(.*);
+  // tlb_rand rdm(.*);
+  tlb_lru lru(.*);

  tlb_ram #(ENTRY_BITS) ram(.*);
  tlb_cam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) cam(.*);
--- a/wally-pipelined/src/mmu/tlb_cam.sv
+++ b/wally-pipelined/src/mmu/tlb_cam.sv
@ -64,6 +64,8 @@ module tlb_cam #(parameter ENTRY_BITS = 3,
  endgenerate

  // In case there are multiple matches in the CAM, select only one
+  // *** it might be guaranteed that the CAM will never have multiple matches.
+  // If so, this is just an encoder
  priority_encoder #(ENTRY_BITS) match_priority(Matches, VPNIndex);

  assign CAMHit = |Matches & ~TLBFlush;
--- a/wally-pipelined/src/mmu/tlb_lru.sv
+++ b/wally-pipelined/src/mmu/tlb_lru.sv
@ -0,0 +1,69 @@
+///////////////////////////////////////////
+// tlb_lru.sv
+//
+// Written: tfleming@hmc.edu & jtorrey@hmc.edu 16 February 2021
+// Modified:
+//
+// Purpose: Implementation of bit pseudo least-recently-used algorithm for
+//          cache evictions. Outputs the index of the next entry to be written.
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+module tlb_lru #(parameter ENTRY_BITS = 3) (
+  input                   clk, reset,
+  input                   TLBWrite,
+  input                   TLBFlush,
+  input  [ENTRY_BITS-1:0] VPNIndex,
+  input                   CAMHit,
+  output [ENTRY_BITS-1:0] WriteIndex
+);
+
+  localparam NENTRIES = 2**ENTRY_BITS;
+
+  // Keep a "recently-used" record for each TLB entry. On access, set to 1
+  logic [NENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed;
+
+  // One-hot encodings of which line is being accessed
+  logic [NENTRIES-1:0] ReadLineOneHot, WriteLineOneHot, AccessLineOneHot;
+  
+  // High if the next access causes all RU bits to be 1
+  logic                AllUsed;
+
+  // Convert indices to one-hot encodings
+  decoder #(ENTRY_BITS) read_decoder(VPNIndex, ReadLineOneHot);
+  // *** should output writelineonehot so we don't have to decode WriteIndex outside
+  decoder #(ENTRY_BITS) write_decoder(WriteIndex, WriteLineOneHot);
+
+  // Find the first line not recently used
+  priority_encoder #(ENTRY_BITS) first_nru(~RUBits, WriteIndex);
+
+  // Access either the hit line or written line
+  assign AccessLineOneHot = (TLBWrite) ? WriteLineOneHot : ReadLineOneHot;
+
+  // Raise the bit of the recently accessed line
+  assign RUBitsAccessed = AccessLineOneHot | RUBits;
+
+  // Determine whether we need to reset the RU bits to all zeroes
+  assign AllUsed = &(RUBitsAccessed);
+  assign RUBitsNext = (AllUsed) ? AccessLineOneHot : RUBitsAccessed;
+
+  // Update LRU state on any TLB hit or write
+  flopenrc #(NENTRIES) lru_state(clk, reset, TLBFlush, (CAMHit || TLBWrite),
+    RUBitsNext, RUBits);
+
+endmodule
--- a/wally-pipelined/src/mmu/tlb_ram.sv
+++ b/wally-pipelined/src/mmu/tlb_ram.sv
@ -57,4 +57,4 @@ module tlb_ram #(parameter ENTRY_BITS = 3) (
      ram[i] = `XLEN'b0;
  end

-endmodule
+endmodule
--- a/wally-pipelined/src/mmu/tlb_rand.sv
+++ b/wally-pipelined/src/mmu/tlb_rand.sv
@ -29,7 +29,7 @@ module tlb_rand #(parameter ENTRY_BITS = 3) (
 );

  logic [31:0] data;
-  assign data = $urandom;
+  assign data = 32'b0;
  assign WriteIndex = data[ENTRY_BITS-1:0];
  
 endmodule
--- a/wally-pipelined/src/muldiv/div.sv
+++ b/wally-pipelined/src/muldiv/div.sv
@ -1479,21 +1479,15 @@ module shifter_l64 (Z, A, Shift);
   logic [63:0]        stage3;
   logic [63:0]        stage4;
   logic [63:0]        stage5;   
-   logic [31:0]        thirtytwozeros = 32'h0;
-   logic [15:0]        sixteenzeros = 16'h0;
-   logic [ 7:0]        eightzeros = 8'h0;
-   logic [ 3:0]        fourzeros = 4'h0;
-   logic [ 1:0]        twozeros = 2'b00;
-   logic 	       onezero = 1'b0;   
   
   output logic [63:0] Z;      
   
-   mux2 #(64) mx01(A,      {A[31:0], thirtytwozeros}, Shift[5], stage1);   
-   mux2 #(64) mx02(stage1, {stage1[47:0], sixteenzeros}, Shift[4], stage2);
-   mux2 #(64) mx03(stage2, {stage2[55:0], eightzeros}, Shift[3], stage3);
-   mux2 #(64) mx04(stage3, {stage3[59:0], fourzeros}, Shift[2], stage4);
-   mux2 #(64) mx05(stage4, {stage4[61:0], twozeros}, Shift[1], stage5);
-   mux2 #(64) mx06(stage5, {stage5[62:0], onezero}, Shift[0], Z);
+   mux2 #(64) mx01(A,      {A[31:0], 32'h0}, Shift[5], stage1);   
+   mux2 #(64) mx02(stage1, {stage1[47:0], 16'h0}, Shift[4], stage2);
+   mux2 #(64) mx03(stage2, {stage2[55:0], 8'h0}, Shift[3], stage3);
+   mux2 #(64) mx04(stage3, {stage3[59:0], 4'h0}, Shift[2], stage4);
+   mux2 #(64) mx05(stage4, {stage4[61:0], 2'h0}, Shift[1], stage5);
+   mux2 #(64) mx06(stage5, {stage5[62:0], 1'h0}, Shift[0], Z);

 endmodule // shifter_l64

@ -1507,21 +1501,15 @@ module shifter_r64 (Z, A, Shift);
   logic [63:0]        stage3;
   logic [63:0]        stage4;
   logic [63:0]        stage5;   		  
-   logic [31:0]        thirtytwozeros = 32'h0;		  
-   logic [15:0]        sixteenzeros = 16'h0;
-   logic [ 7:0]        eightzeros = 8'h0;
-   logic [ 3:0]        fourzeros = 4'h0;
-   logic [ 1:0]        twozeros = 2'b00;
-   logic 	       onezero = 1'b0;   
   
   output logic [63:0] Z;
   
-   mux2 #(64) mx01(A, {thirtytwozeros, A[63:32]}, Shift[5], stage1);		  
-   mux2 #(64) mx02(stage1, {sixteenzeros, stage1[63:16]}, Shift[4], stage2);
-   mux2 #(64) mx03(stage2, {eightzeros, stage2[63:8]}, Shift[3], stage3);
-   mux2 #(64) mx04(stage3, {fourzeros, stage3[63:4]}, Shift[2], stage4);
-   mux2 #(64) mx05(stage4, {twozeros, stage4[63:2]}, Shift[1], stage5);
-   mux2 #(64) mx06(stage5, {onezero, stage5[63:1]},  Shift[0], Z);
+   mux2 #(64) mx01(A, {32'h0, A[63:32]}, Shift[5], stage1);		  
+   mux2 #(64) mx02(stage1, {16'h0, stage1[63:16]}, Shift[4], stage2);
+   mux2 #(64) mx03(stage2, {8'h0, stage2[63:8]}, Shift[3], stage3);
+   mux2 #(64) mx04(stage3, {4'h0, stage3[63:4]}, Shift[2], stage4);
+   mux2 #(64) mx05(stage4, {2'h0, stage4[63:2]}, Shift[1], stage5);
+   mux2 #(64) mx06(stage5, {1'h0, stage5[63:1]},  Shift[0], Z);
   
 endmodule // shifter_r64

@ -1534,19 +1522,14 @@ module shifter_l32 (Z, A, Shift);
   logic [31:0]        stage2;
   logic [31:0]        stage3;
   logic [31:0]        stage4;
-   logic [15:0]        sixteenzeros = 16'h0;
-   logic [ 7:0]        eightzeros = 8'h0;
-   logic [ 3:0]        fourzeros = 4'h0;
-   logic [ 1:0]        twozeros = 2'b00;
-   logic 	       onezero = 1'b0;   
   
   output logic [31:0] Z;      

-   mux2 #(32) mx01(A,      {A[15:0], sixteenzeros},    Shift[4], stage1);
-   mux2 #(32) mx02(stage1, {stage1[23:0], eightzeros}, Shift[3], stage2);
-   mux2 #(32) mx03(stage2, {stage2[27:0], fourzeros},  Shift[2], stage3);
-   mux2 #(32) mx04(stage3, {stage3[29:0], twozeros},   Shift[1], stage4);
-   mux2 #(32) mx05(stage4, {stage4[30:0], onezero},    Shift[0], Z);
+   mux2 #(32) mx01(A,      {A[15:0], 16'h0},    Shift[4], stage1);
+   mux2 #(32) mx02(stage1, {stage1[23:0], 8'h0}, Shift[3], stage2);
+   mux2 #(32) mx03(stage2, {stage2[27:0], 4'h0},  Shift[2], stage3);
+   mux2 #(32) mx04(stage3, {stage3[29:0], 2'h0},   Shift[1], stage4);
+   mux2 #(32) mx05(stage4, {stage4[30:0], 1'h0},    Shift[0], Z);

 endmodule // shifter_l32

@ -1559,19 +1542,14 @@ module shifter_r32 (Z, A, Shift);
   logic [31:0]        stage2;
   logic [31:0]        stage3;
   logic [31:0]        stage4;
-   logic [15:0]        sixteenzeros = 16'h0;
-   logic [ 7:0]        eightzeros = 8'h0;
-   logic [ 3:0]        fourzeros = 4'h0;
-   logic [ 1:0]        twozeros = 2'b00;
-   logic 	       onezero = 1'b0;   
   
   output logic [31:0] Z;
   
-   mux2 #(32) mx01(A,      {sixteenzeros, A[31:16]},   Shift[4], stage1);
-   mux2 #(32) mx02(stage1, {eightzeros, stage1[31:8]}, Shift[3], stage2);
-   mux2 #(32) mx03(stage2, {fourzeros, stage2[31:4]},  Shift[2], stage3);
-   mux2 #(32) mx04(stage3, {twozeros, stage3[31:2]},   Shift[1], stage4);
-   mux2 #(32) mx05(stage4, {onezero, stage4[31:1]},    Shift[0], Z);
+   mux2 #(32) mx01(A,      {16'h0, A[31:16]},   Shift[4], stage1);
+   mux2 #(32) mx02(stage1, {8'h0, stage1[31:8]}, Shift[3], stage2);
+   mux2 #(32) mx03(stage2, {4'h0, stage2[31:4]},  Shift[2], stage3);
+   mux2 #(32) mx04(stage3, {2'h0, stage3[31:2]},   Shift[1], stage4);
+   mux2 #(32) mx05(stage4, {1'h0, stage4[31:1]},    Shift[0], Z);
   
 endmodule // shifter_r32

--- a/wally-pipelined/src/privileged/csrc.sv
+++ b/wally-pipelined/src/privileged/csrc.sv
@ -80,7 +80,7 @@ module csrc (

            for (j=0; j<= `COUNTERS; j = j+1) begin 
                // Write enables
-                if (j !==1) begin
+                if (j != 1) begin
                    assign WriteHPMCOUNTERM[j] = CSRMWriteM && (CSRAdrM == MHPMCOUNTER[j]);
                    // Count Signals 
                    assign HPMCOUNTERPlusM[j] = HPMCOUNTER_REGW[j] + {63'b0, MCOUNTEN[j] & ~MCOUNTINHIBIT_REGW[j]}; 
--- a/wally-pipelined/src/privileged/csri.sv
+++ b/wally-pipelined/src/privileged/csri.sv
@ -49,13 +49,13 @@ module csri #(parameter
  // assumes no N-mode user interrupts

  always_comb begin
-    IntInM     = 0; // *** does this really work
-    IntInM[11] = ExtIntM & ~MIDELEG_REGW[9];   // MEIP
-    IntInM[9]  = ExtIntM &  MIDELEG_REGW[9];   // SEIP
-    IntInM[7]  = TimerIntM & ~MIDELEG_REGW[5]; // MTIP
-    IntInM[5]  = TimerIntM &  MIDELEG_REGW[5]; // STIP
-    IntInM[3]  = SwIntM & ~MIDELEG_REGW[1];    // MSIP
-    IntInM[1]  = SwIntM &  MIDELEG_REGW[1];    // SSIP
+    IntInM      = 0; // *** does this overwriting technique really synthesize
+    IP_REGW[11] = ExtIntM & ~MIDELEG_REGW[9];   // MEIP
+    IntInM[9]   = ExtIntM &  MIDELEG_REGW[9];   // SEIP
+    IntInM[7]   = TimerIntM & ~MIDELEG_REGW[5]; // MTIP
+    IntInM[5]   = TimerIntM &  MIDELEG_REGW[5]; // STIP
+    IntInM[3]   = SwIntM & ~MIDELEG_REGW[1];    // MSIP
+    IntInM[1]   = SwIntM &  MIDELEG_REGW[1];    // SSIP
   end

  // Interrupt Write Enables
@ -77,14 +77,14 @@ module csri #(parameter
      assign SIP_WRITE_MASK = 12'h000;
    end
    always @(posedge clk, posedge reset) begin
-      if (reset)          IP_REGW <= 12'b0;
-      else if (WriteMIPM) IP_REGW <= (CSRWriteValM & MIP_WRITE_MASK) | IntInM; // MTIP unclearable
-      else if (WriteSIPM) IP_REGW <= (CSRWriteValM & SIP_WRITE_MASK) | IntInM; // MTIP unclearable
+      if (reset)          IP_REGW[9:0] <= 10'b0;
+      else if (WriteMIPM) IP_REGW[9:0] <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
+      else if (WriteSIPM) IP_REGW[9:0] <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
 //      else if (WriteUIPM) IP_REGW = (CSRWriteValM & 12'hBBB) | (NextIPM & 12'h080); // MTIP unclearable
-      else                IP_REGW <= IP_REGW | IntInM; // *** check this turns off interrupts properly even when MIDELEG changes
+      else                IP_REGW[9:0] <= IP_REGW[9:0] | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes
    end
    always @(posedge clk, posedge reset) begin
-      if (reset)              IE_REGW <= 12'b0;
+      if (reset)          IE_REGW <= 12'b0;
      else if (WriteMIEM) IE_REGW <= (CSRWriteValM & 12'hAAA); // MIE controls M and S fields
      else if (WriteSIEM) IE_REGW <= (CSRWriteValM & 12'h222) | (IE_REGW & 12'h888); // only S fields
 //      else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field
--- a/wally-pipelined/src/privileged/privileged.sv
+++ b/wally-pipelined/src/privileged/privileged.sv
@ -40,7 +40,7 @@ module privileged (
  input  logic             InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
  input  logic [3:0]       InstrClassM,
  input  logic             PrivilegedM,
-  input  logic             InstrPageFaultM, LoadPageFaultM, StorePageFaultM,
+  input  logic             InstrPageFaultF, LoadPageFaultM, StorePageFaultM,
  input  logic             InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
  input  logic             LoadMisalignedFaultM, LoadAccessFaultM,
  input  logic             StoreMisalignedFaultM, StoreAccessFaultM,
@ -62,8 +62,9 @@ module privileged (

  logic uretM, sretM, mretM, ecallM, ebreakM, wfiM, sfencevmaM;
  logic IllegalCSRAccessM;
-  logic  IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
-  logic       InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
+  logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
+  logic InstrPageFaultD, InstrPageFaultE, InstrPageFaultM;
+  logic InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
  logic IllegalInstrFaultM;

  logic BreakpointFaultM, EcallFaultM;
@ -129,13 +130,15 @@ module privileged (
  // assign StorePageFaultM = 0;

  // pipeline fault signals
-  flopenrc #(1) faultregD(clk, reset, FlushD, ~StallD, InstrAccessFaultF, InstrAccessFaultD);
-  flopenrc #(2) faultregE(clk, reset, FlushE, ~StallE,
-                           {IllegalIEUInstrFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
-                           {IllegalIEUInstrFaultE, InstrAccessFaultE});
-  flopenrc #(2) faultregM(clk, reset, FlushM, ~StallM,
-                         {IllegalIEUInstrFaultE, InstrAccessFaultE},
-                         {IllegalIEUInstrFaultM, InstrAccessFaultM});
+  flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD,
+                  {InstrPageFaultF, InstrAccessFaultF},
+                  {InstrPageFaultD, InstrAccessFaultD});
+  flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE,
+                  {IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
+                  {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE});
+  flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM,
+                  {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE},
+                  {IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM});

  trap trap(.*);

--- a/wally-pipelined/src/privileged/trap.sv
+++ b/wally-pipelined/src/privileged/trap.sv
@ -49,7 +49,7 @@ module trap (
  logic InterruptM;

  // Determine pending enabled interrupts
-  assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) || STATUS_MIE; // if M ints enabled or lower priv 3.1.9
+  assign MIntGlobalEnM = {12{(PrivilegeModeW != `M_MODE) || STATUS_MIE}}; // if M ints enabled or lower priv 3.1.9
  assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) || STATUS_SIE; // if S ints enabled or lower priv 3.1.9
  assign PendingIntsM = (MIP_REGW & MIE_REGW) & ((MIntGlobalEnM & 12'h888) | (SIntGlobalEnM & 12'h222));
  assign InterruptM = |PendingIntsM; // interrupt if any sources are pending
--- a/wally-pipelined/src/uncore/uart.sv
+++ b/wally-pipelined/src/uncore/uart.sv
@ -41,13 +41,15 @@ module uart (

  // UART interface signals
  logic [2:0]      A;
-  logic            MEMRb, MEMWb;
+  logic            MEMRb, MEMWb, memread, memwrite;
  logic [7:0]      Din, Dout;

  // rename processor interface signals to match PC16550D and provide one-byte interface
-  flopr #(1)  memreadreg(HCLK, ~HRESETn, ~(HSELUART & ~HWRITE), MEMRb);
-  flopr #(1) memwritereg(HCLK, ~HRESETn, ~(HSELUART &  HWRITE), MEMWb);
+  flopr #(1)  memreadreg(HCLK, ~HRESETn, (HSELUART & ~HWRITE), memread);
+  flopr #(1) memwritereg(HCLK, ~HRESETn, (HSELUART &  HWRITE), memwrite);
  flopr #(3)   haddrreg(HCLK, ~HRESETn, HADDR[2:0], A);
+  assign MEMRb = ~memread;
+  assign MEMWb = ~memwrite;

  assign HRESPUART = 0; // OK
  assign HREADYUART = 1; // should idle high during address phase and respond high when done; will need to be modified if UART ever needs more than 1 cycle to do something
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@ -24,6 +24,7 @@
 ///////////////////////////////////////////

 `include "wally-config.vh"
+`include "wally-constants.vh"
 /* verilator lint_on UNUSED */

 module wallypipelinedhart (
@ -76,7 +77,7 @@ module wallypipelinedhart (
  logic InstrMisalignedFaultM;
  logic DataMisalignedM;
  logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
-  logic InstrPageFaultM, LoadPageFaultM, StorePageFaultM;
+  logic InstrPageFaultF, LoadPageFaultM, StorePageFaultM;
  logic LoadMisalignedFaultM, LoadAccessFaultM;
  logic StoreMisalignedFaultM, StoreAccessFaultM;
  logic [`XLEN-1:0] InstrMisalignedAdrM;
--- a/wally-pipelined/testbench/testbench-busybear.sv
+++ b/wally-pipelined/testbench/testbench-busybear.sv
@ -279,14 +279,15 @@ module testbench_busybear();
    end
  end

-  string sepc_lit = "SEPC";
  `define CHECK_CSR2(CSR, PATH) \
    string CSR; \
    logic [63:0] expected``CSR``; \
    //CSR checking \
    always @(``PATH``.``CSR``_REGW) begin \
        if ($time > 1) begin \
-          if (sepc_lit.icompare(`"CSR`")) begin #1; end \
+          if ("SEPC" == `"CSR`") begin #1; end \
+          if ("SCAUSE" == `"CSR`") begin #2; end \
+          if ("SSTATUS" == `"CSR`") begin #3; end \
          scan_file_csr = $fscanf(data_file_csr, "%s\n", CSR); \
          scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \
          if(CSR.icompare(`"CSR`")) begin \
@ -463,8 +464,10 @@ module testbench_busybear();
              32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ
              32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J
                speculative = 1;
-              32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK:
+              32'bXXXXXXXXXXXXXXXX1001000000000010, // C.EBREAK:
+              32'bXXXXXXXXXXXXXXXXX000XXXXX1110011: // Something that's not CSRR*
                speculative = 0; // tbh don't really know what should happen here
+              32'b000110000000XXXXXXXXXXXXX1110011, // CSR* SATP, *
              32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR
              32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL
                speculative = 1;
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@ -352,7 +352,7 @@ module testbench();
  };

  string tests64periph[] = '{
-    "rv64i-periph/WALLY-PLIC", "2000"
+    "rv64i-periph/WALLY-PLIC", "2080"
  };

  string tests32periph[] = '{
@ -402,7 +402,7 @@ module testbench();
      if (TESTSPERIPH) begin 
        tests = tests32periph;
      end else begin
-          tests = {tests32i,tests32periph};
+          tests = {tests32i};//,tests32periph}; *** broken at the moment
          if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};    
          else                       tests = {tests, tests32iNOc};
          if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
--- a/wally-pipelined/testbench/testbench-peripherals.sv
+++ b/wally-pipelined/testbench/testbench-peripherals.sv
@ -1,413 +0,0 @@
-///////////////////////////////////////////
-// testbench-imperas.sv
-//
-// Written: David_Harris@hmc.edu 9 January 2021
-// Modified: 
-//
-// Purpose: Wally Testbench and helper modules
-//          Applies test programs from the Imperas suite
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
-// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
-// is furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
-// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-///////////////////////////////////////////
-
-`include "wally-config.vh"
-
-module testbench();
-  parameter DEBUG = 0;
-  parameter TESTSBP = 0;
-  
-  logic        clk;
-  logic        reset;
-
-  int test, i, errors, totalerrors;
-  logic [31:0] sig32[0:10000];
-  logic [`XLEN-1:0] signature[0:10000];
-  logic [`XLEN-1:0] testadr;
-  string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
-  logic [31:0] InstrW;
-  logic [`XLEN-1:0] meminit;
-  
-  string tests64i[] = {       
-    "peripherals/WALLY-PLIC", "2000"          
-    //"peripherals/WALLY-UART", "2000"
-  };
-  string tests64ic[] = {
-  };
-  string tests64iNOc[] = {
-  };
-  string tests64m[] = {
-  };
-  string tests64a[] = {
-  };
-  string tests32a[] = {
-  };
-  string tests32m[] = {
-  };
-  string tests32ic[] = {
-  };
-  string tests32iNOc[] = {
-  };
-  string tests32i[] = {
-  };
-  string testsBP64[] = {
-	};
-  string tests64p[] = {
-  };
-
-  string tests[];
-  string ProgramAddrMapFile, ProgramLabelMapFile;
-  logic [`AHBW-1:0] HRDATAEXT;
-  logic             HREADYEXT, HRESPEXT;
-  logic [31:0]      HADDR;
-  logic [`AHBW-1:0] HWDATA;
-  logic             HWRITE;
-  logic [2:0]       HSIZE;
-  logic [2:0]       HBURST;
-  logic [3:0]       HPROT;
-  logic [1:0]       HTRANS;
-  logic             HMASTLOCK;
-  logic             HCLK, HRESETn;
-  logic [`XLEN-1:0] PCW;
-  
-  flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW);
-  flopenr  #(32)   InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW,  dut.hart.ifu.InstrM, InstrW);
-  // pick tests based on modes supported
-  initial begin
-    if (`XLEN == 64) begin // RV64
-      if (TESTSBP) begin
-	      tests = testsBP64;	
-      end else begin 
-	      tests = {tests64i};
-        if (`C_SUPPORTED) tests = {tests, tests64ic};
-        else              tests = {tests, tests64iNOc};
-        if (`M_SUPPORTED) tests = {tests, tests64m};
-        // if (`F_SUPPORTED) tests = {tests64f, tests};
-        // if (`D_SUPPORTED) tests = {tests64d, tests};
-        if (`A_SUPPORTED) tests = {tests, tests64a};
-      end
- //     tests = {tests64a, tests};
-      tests = {tests, tests64p};
-    end else begin // RV32
-      // *** add the 32 bit bp tests
-      tests = {tests32i};
-      if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};    
-      else                       tests = {tests, tests32iNOc};
-      if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
-      // if (`F_SUPPORTED) tests = {tests32f, tests};
-      if (`A_SUPPORTED) tests = {tests, tests32a};
-    end
-
-    // tests = tests64p;
-  end
-
-
-  string signame, memfilename;
-
-  logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
-  logic UARTSin, UARTSout;
-
-  // instantiate device to be tested
-  assign GPIOPinsIn = 0;
-  assign UARTSin = 1;
-  assign HREADYEXT = 1;
-  assign HRESPEXT = 0;
-  assign HRDATAEXT = 0;
-
-  wallypipelinedsoc dut(.*); 
-
-  // Track names of instructions
-  instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
-                dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
-                dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName,
-                InstrEName, InstrMName, InstrWName);
-
-  // initialize tests
-  initial
-    begin
-      test = 0;
-      totalerrors = 0;
-      testadr = 0;
-      // fill memory with defined values to reduce Xs in simulation
-      if (`XLEN == 32) meminit = 32'hFEDC0123;
-      else meminit = 64'hFEDCBA9876543210;
-      for (i=0; i<=65535; i = i+1) begin
-        //dut.imem.RAM[i] = meminit;
-       // dut.uncore.RAM[i] = meminit;
-      end
-      // read test vectors into memory
-      memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
-      $readmemh(memfilename, dut.imem.RAM);
-      $readmemh(memfilename, dut.uncore.dtim.RAM);
-      ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
-      ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
-      $display("Read memfile %s", memfilename);
-      reset = 1; # 42; reset = 0;
-    end
-
-  // generate clock to sequence tests
-  always
-    begin
-      clk = 1; # 5; clk = 0; # 5;
-    end
-   
-  // check results
-  always @(negedge clk)
-    begin    
-      if (dut.hart.priv.EcallFaultM && 
-          (dut.hart.ieu.dp.regf.rf[3] == 1 || (dut.hart.ieu.dp.regf.we3 && dut.hart.ieu.dp.regf.a3 == 3 && dut.hart.ieu.dp.regf.wd3 == 1))) begin
-        $display("Code ended with ecall with gp = 1");
-        #60; // give time for instructions in pipeline to finish
-        // clear signature to prevent contamination from previous tests
-        for(i=0; i<10000; i=i+1) begin
-          sig32[i] = 'bx;
-        end
-
-        // read signature, reformat in 64 bits if necessary
-        signame = {"../../imperas-riscv-tests/work/", tests[test], ".signature.output"};
-        $readmemh(signame, sig32);
-        i = 0;
-        while (i < 10000) begin
-          if (`XLEN == 32) begin
-            signature[i] = sig32[i];
-            i = i+1;
-          end else begin
-            signature[i/2] = {sig32[i+1], sig32[i]};
-            i = i + 2;
-          end
-        end
-
-        // Check errors
-        i = 0;
-        errors = 0;
-        if (`XLEN == 32)
-          testadr = (`TIMBASE+tests[test+1].atohex())/4;
-        else
-          testadr = (`TIMBASE+tests[test+1].atohex())/8;
-        /* verilator lint_off INFINITELOOP */
-        while (signature[i] !== 'bx) begin
-          //$display("signature[%h] = %h", i, signature[i]);
-          if (signature[i] !== dut.uncore.dtim.RAM[testadr+i]) begin
-            if (signature[i+4] !== 'bx || signature[i] !== 32'hFFFFFFFF) begin
-              // report errors unless they are garbage at the end of the sim
-              // kind of hacky test for garbage right now
-              errors = errors+1;
-              $display("  Error on test %s result %d: adr = %h sim = %h, signature = %h", 
-                    tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]);
-            end
-          end
-          i = i + 1;
-        end
-        /* verilator lint_on INFINITELOOP */
-        if (errors == 0) $display("%s succeeded.  Brilliant!!!", tests[test]);
-        else begin
-          $display("%s failed with %d errors. :(", tests[test], errors);
-          totalerrors = totalerrors+1;
-        end
-        test = test + 2;
-        if (test == tests.size()) begin
-          if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
-          else $display("FAIL: %d test programs had errors", totalerrors);
-          $stop;
-        end
-        else begin
-          memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
-          $readmemh(memfilename, dut.imem.RAM);
-          $readmemh(memfilename, dut.uncore.dtim.RAM);
-          $display("Read memfile %s", memfilename);
-	  ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
-	  ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
-          reset = 1; # 17; reset = 0;
-        end
-      end
-    end // always @ (negedge clk)
-
-  // track the current function or global label
-  if (DEBUG == 1) begin : functionRadix
-    function_radix function_radix(.reset(reset),
-				  .ProgramAddrMapFile(ProgramAddrMapFile),
-				  .ProgramLabelMapFile(ProgramLabelMapFile));
-  end
-
-  // initialize the branch predictor
-  initial begin
-    $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.Predictor.DirPredictor.PHT.memory);
-    $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.TargetPredictor.memory.memory);    
-  end
-  
-endmodule
-
-/* verilator lint_on STMTDLY */
-/* verilator lint_on WIDTH */
-
-module instrTrackerTB(
-  input  logic            clk, reset, FlushE,
-  input  logic [31:0]     InstrF, InstrD,
-  input  logic [31:0]     InstrE, InstrM,
-  input  logic [31:0]     InstrW,
-//  output logic [31:0]     InstrW,
-  output string           InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
-        
-  // stage Instr to Writeback for visualization
-  // flopr  #(32) InstrWReg(clk, reset, InstrM, InstrW);
-
-  instrNameDecTB fdec(InstrF, InstrFName);
-  instrNameDecTB ddec(InstrD, InstrDName);
-  instrNameDecTB edec(InstrE, InstrEName);
-  instrNameDecTB mdec(InstrM, InstrMName);
-  instrNameDecTB wdec(InstrW, InstrWName);
-endmodule
-
-// decode the instruction name, to help the test bench
-module instrNameDecTB(
-  input  logic [31:0] instr,
-  output string       name);
-
-  logic [6:0] op;
-  logic [2:0] funct3;
-  logic [6:0] funct7;
-  logic [11:0] imm;
-
-  assign op = instr[6:0];
-  assign funct3 = instr[14:12];
-  assign funct7 = instr[31:25];
-  assign imm = instr[31:20];
-
-  // it would be nice to add the operands to the name 
-  // create another variable called decoded
-
-  always_comb 
-    casez({op, funct3})
-      10'b0000000_000: name = "BAD";
-      10'b0000011_000: name = "LB";
-      10'b0000011_001: name = "LH";
-      10'b0000011_010: name = "LW";
-      10'b0000011_011: name = "LD";
-      10'b0000011_100: name = "LBU";
-      10'b0000011_101: name = "LHU";
-      10'b0000011_110: name = "LWU";
-      10'b0010011_000: if (instr[31:15] == 0 && instr[11:7] ==0) name = "NOP/FLUSH";
-                       else                                      name = "ADDI";
-      10'b0010011_001: if (funct7[6:1] == 6'b000000) name = "SLLI";
-                       else                      name = "ILLEGAL";
-      10'b0010011_010: name = "SLTI";
-      10'b0010011_011: name = "SLTIU";
-      10'b0010011_100: name = "XORI";
-      10'b0010011_101: if (funct7[6:1] == 6'b000000)      name = "SRLI";
-                       else if (funct7[6:1] == 6'b010000) name = "SRAI"; 
-                       else                           name = "ILLEGAL"; 
-      10'b0010011_110: name = "ORI";
-      10'b0010011_111: name = "ANDI";
-      10'b0010111_???: name = "AUIPC";
-      10'b0100011_000: name = "SB";
-      10'b0100011_001: name = "SH";
-      10'b0100011_010: name = "SW";
-      10'b0100011_011: name = "SD";
-      10'b0011011_000: name = "ADDIW";
-      10'b0011011_001: name = "SLLIW";
-      10'b0011011_101: if      (funct7 == 7'b0000000) name = "SRLIW";
-                       else if (funct7 == 7'b0100000) name = "SRAIW";
-                       else                           name = "ILLEGAL";
-      10'b0111011_000: if      (funct7 == 7'b0000000) name = "ADDW";
-                       else if (funct7 == 7'b0100000) name = "SUBW";
-                       else if (funct7 == 7'b0000001) name = "MULW";
-                       else                           name = "ILLEGAL";
-      10'b0111011_001: if      (funct7 == 7'b0000000) name = "SLLW";
-                       else if (funct7 == 7'b0000001) name = "DIVW";
-                       else                           name = "ILLEGAL";
-      10'b0111011_101: if      (funct7 == 7'b0000000) name = "SRLW";
-                       else if (funct7 == 7'b0100000) name = "SRAW";
-                       else if (funct7 == 7'b0000001) name = "DIVUW";
-                       else                           name = "ILLEGAL";
-      10'b0111011_110: if      (funct7 == 7'b0000001) name = "REMW";
-                       else                           name = "ILLEGAL";
-      10'b0111011_111: if      (funct7 == 7'b0000001) name = "REMUW";
-                       else                           name = "ILLEGAL";
-      10'b0110011_000: if      (funct7 == 7'b0000000) name = "ADD";
-                       else if (funct7 == 7'b0000001) name = "MUL";
-                       else if (funct7 == 7'b0100000) name = "SUB"; 
-                       else                           name = "ILLEGAL"; 
-      10'b0110011_001: if      (funct7 == 7'b0000000) name = "SLL";
-                       else if (funct7 == 7'b0000001) name = "MULH";
-                       else                           name = "ILLEGAL";
-      10'b0110011_010: if      (funct7 == 7'b0000000) name = "SLT";
-                       else if (funct7 == 7'b0000001) name = "MULHSU";
-                       else                           name = "ILLEGAL";
-      10'b0110011_011: if      (funct7 == 7'b0000000) name = "SLTU";
-                       else if (funct7 == 7'b0000001) name = "MULHU";
-                       else                           name = "ILLEGAL";
-      10'b0110011_100: if      (funct7 == 7'b0000000) name = "XOR";
-                       else if (funct7 == 7'b0000001) name = "DIV";
-                       else                           name = "ILLEGAL";
-      10'b0110011_101: if      (funct7 == 7'b0000000) name = "SRL";
-                       else if (funct7 == 7'b0000001) name = "DIVU";
-                       else if (funct7 == 7'b0100000) name = "SRA";
-                       else                           name = "ILLEGAL";
-      10'b0110011_110: if      (funct7 == 7'b0000000) name = "OR";
-                       else if (funct7 == 7'b0000001) name = "REM";
-                       else                           name = "ILLEGAL";
-      10'b0110011_111: if      (funct7 == 7'b0000000) name = "AND";
-                       else if (funct7 == 7'b0000001) name = "REMU";
-                       else                           name = "ILLEGAL";
-      10'b0110111_???: name = "LUI";
-      10'b1100011_000: name = "BEQ";
-      10'b1100011_001: name = "BNE";
-      10'b1100011_100: name = "BLT";
-      10'b1100011_101: name = "BGE";
-      10'b1100011_110: name = "BLTU";
-      10'b1100011_111: name = "BGEU";
-      10'b1100111_000: name = "JALR";
-      10'b1101111_???: name = "JAL";
-      10'b1110011_000: if      (imm == 0) name = "ECALL";
-                       else if (imm == 1) name = "EBREAK";
-                       else if (imm == 2) name = "URET";
-                       else if (imm == 258) name = "SRET";
-                       else if (imm == 770) name = "MRET";
-                       else              name = "ILLEGAL";
-      10'b1110011_001: name = "CSRRW";
-      10'b1110011_010: name = "CSRRS";
-      10'b1110011_011: name = "CSRRC";
-      10'b1110011_101: name = "CSRRWI";
-      10'b1110011_110: name = "CSRRSI";
-      10'b1110011_111: name = "CSRRCI";
-      10'b0101111_010: if      (funct7[6:2] == 5'b00010) name = "LR.W";
-                       else if (funct7[6:2] == 5'b00011) name = "SC.W";
-                       else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.W";
-                       else if (funct7[6:2] == 5'b00000) name = "AMOADD.W";
-                       else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.W";
-                       else if (funct7[6:2] == 5'b01100) name = "AMOAND.W";
-                       else if (funct7[6:2] == 5'b01000) name = "AMOOR.W";
-                       else if (funct7[6:2] == 5'b10000) name = "AMOMIN.W";
-                       else if (funct7[6:2] == 5'b10100) name = "AMOMAX.W";
-                       else if (funct7[6:2] == 5'b11000) name = "AMOMINU.W";
-                       else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.W";
-                       else                              name = "ILLEGAL";
-      10'b0101111_011: if      (funct7[6:2] == 5'b00010) name = "LR.D";
-                       else if (funct7[6:2] == 5'b00011) name = "SC.D";
-                       else if (funct7[6:2] == 5'b00001) name = "AMOSWAP.D";
-                       else if (funct7[6:2] == 5'b00000) name = "AMOADD.D";
-                       else if (funct7[6:2] == 5'b00100) name = "AMOAXOR.D";
-                       else if (funct7[6:2] == 5'b01100) name = "AMOAND.D";
-                       else if (funct7[6:2] == 5'b01000) name = "AMOOR.D";
-                       else if (funct7[6:2] == 5'b10000) name = "AMOMIN.D";
-                       else if (funct7[6:2] == 5'b10100) name = "AMOMAX.D";
-                       else if (funct7[6:2] == 5'b11000) name = "AMOMINU.D";
-                       else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D";
-                       else                              name = "ILLEGAL";
-      10'b0001111_???: name = "FENCE";
-      default:         name = "ILLEGAL";
-    endcase
-endmodule