diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv
index 2ed6c75da..e2dd7eef6 100644
--- a/wally-pipelined/src/ieu/alu.sv
+++ b/wally-pipelined/src/ieu/alu.sv
@@ -26,67 +26,68 @@
 `include "wally-config.vh"
 
 module alu #(parameter WIDTH=32) (
-  input  logic [WIDTH-1:0] a, b,
+  input  logic [WIDTH-1:0] A, B,
   input  logic [2:0]       ALUControl,
   input  logic [2:0]       Funct3,
-  output logic [WIDTH-1:0] result,
-  output logic [WIDTH-1:0] sum);
+  output logic [WIDTH-1:0] Result,
+  output logic [WIDTH-1:0] Sum);
 
-  logic [WIDTH-1:0] condinvb, sumtrunc, shift, slt, sltu, bor;
-  logic        right; //, arith, w64;
-  logic        carry, neg;
-  logic        lt, ltu;
-  logic        overflow;
+  logic [WIDTH-1:0] CondInvB, SumTrunc, Shift, SLT, SLTU, bor;
+  logic        Right;
+  logic        Carry, Neg;
+  logic        LT, LTU;
+  logic        Overflow;
   logic        W64, SubArith, ALUOp;
+  logic [2:0]  ALUFunct;
 
+  // Extract control signals
+  // W64 indicates RV64 W-suffix instructions acting on lower 32-bit word
+  // SubArith indicates subtraction
+  // ALUOp = 0 for address generation addition or 1 for regular ALU
   assign {W64, SubArith, ALUOp} = ALUControl;
+
   // addition
-  // *** make sure condinvb is only applied when it should be (sub, slt/sltu)
-  assign condinvb = SubArith ? ~b : b;
-  assign {carry, sum} = a + condinvb + {{(WIDTH-1){1'b0}}, SubArith};
+  assign CondInvB = SubArith ? ~B : B;
+  assign {Carry, Sum} = A + CondInvB + {{(WIDTH-1){1'b0}}, SubArith};
   
   // support W-type RV64I ADDW/SUBW/ADDIW that sign-extend 32-bit result to 64 bits
   generate
     if (WIDTH==64)
-      assign sumtrunc = W64 ? {{32{sum[31]}}, sum[31:0]} : sum;
+      assign SumTrunc = W64 ? {{32{Sum[31]}}, Sum[31:0]} : Sum;
     else
-      assign sumtrunc = sum;
+      assign SumTrunc = Sum;
   endgenerate
   
-  // shifts
- // assign arith = alucontrol[3]; // sra
- // assign w64 = alucontrol[4];
-  assign right = (Funct3[2:0] == 3'b101); // sra or srl
-  shifter sh(a, b[5:0], right, SubArith, W64, shift);
-  
-  // OR optionally passes zero when ALUControl[3] is set, supporting lui
-  // *** not needed anymore; simplify control
-  //assign bor = alucontrol[3] ? b : a|b;
+  // Shifts
+  // assign arith = alucontrol[3]; // sra
+  // assign w64 = alucontrol[4];
+  assign Right = (Funct3[2:0] == 3'b101); // sra or srl
+  shifter sh(A, B[5:0], Right, SubArith, W64, Shift);
   
   // condition code flags based on add/subtract output
-  assign neg  = sum[WIDTH-1];
-  // overflow occurs when the numbers being added have the same sign 
+  // Overflow occurs when the numbers being added have the same sign 
   // and the result has the opposite sign
-  assign overflow = (a[WIDTH-1] ~^ condinvb[WIDTH-1]) & (a[WIDTH-1] ^ sum[WIDTH-1]);
-  assign lt = neg ^ overflow;
-  assign ltu = ~carry;
+  assign Overflow = (A[WIDTH-1] ~^ CondInvB[WIDTH-1]) & (A[WIDTH-1] ^ Sum[WIDTH-1]);
+  assign Neg  = Sum[WIDTH-1];
+  assign LT = Neg ^ Overflow;
+  assign LTU = ~Carry;
  
-  // slt
-  assign slt = {{(WIDTH-1){1'b0}}, lt};
-  assign sltu = {{(WIDTH-1){1'b0}}, ltu};
+  // SLT
+  assign SLT = {{(WIDTH-1){1'b0}}, LT};
+  assign SLTU = {{(WIDTH-1){1'b0}}, LTU};
  
+  // Select appropriate ALU Result
+  assign ALUFunct = Funct3 & {3{ALUOp}}; // Force ALUFunct to 0 to Add when ALUOp = 0
   always_comb
-    if (~ALUOp) result = sumtrunc;
-    else 
-      case (Funct3)
-        3'b000: result = sumtrunc;       // add or sub
-        3'b001: result = shift;     // sll
-        3'b010: result = slt;       // slt
-        3'b011: result = sltu;      // sltu
-        3'b100: result = a ^ b;     // xor
-        3'b101: result = shift;     // sra or srl
-        3'b110: result = a | b;     // or 
-        3'b111: result = a & b;     // and
-      endcase
+    case (ALUFunct)
+      3'b000: Result = SumTrunc;  // add or sub
+      3'b001: Result = Shift;     // sll
+      3'b010: Result = SLT;       // slt
+      3'b011: Result = SLTU;      // sltu
+      3'b100: Result = A ^ B;     // xor
+      3'b101: Result = Shift;     // sra or srl
+      3'b110: Result = A | B;     // or 
+      3'b111: Result = A & B;     // and
+    endcase
 endmodule
 
diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv
index b0645f810..83c1f918e 100644
--- a/wally-pipelined/src/ieu/datapath.sv
+++ b/wally-pipelined/src/ieu/datapath.sv
@@ -93,9 +93,7 @@ module datapath (
   assign Rs1D      = InstrD[19:15];
   assign Rs2D      = InstrD[24:20];
   assign RdD       = InstrD[11:7];
-
-  //Mux for writting floating point 
-  
+  // *** can FWriteIntW be merged with RegWriteW
   regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D);
   extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ExtImmD);
  
@@ -103,9 +101,9 @@ module datapath (
   flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E);
   flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, RD2D, RD2E);
   flopenrc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ~StallE, ExtImmD, ExtImmE);
-  flopenrc #(5)    Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
-  flopenrc #(5)    Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
-  flopenrc #(5)    RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
+  flopenrc #(5)     Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E);
+  flopenrc #(5)     Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
+  flopenrc #(5)     RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
 	
   mux3  #(`XLEN)  faemux(RD1E, WriteDataW, ResultM, ForwardAE, ForwardedSrcAE);
   mux3  #(`XLEN)  fbemux(RD2E, WriteDataW, ResultM, ForwardBE, ForwardedSrcBE);
@@ -114,35 +112,31 @@ module datapath (
   mux2  #(`XLEN)  srcbmux(ForwardedSrcBE, ExtImmE, ALUSrcBE, SrcBE);
   alu   #(`XLEN)  alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUPreResultE, AddressE);
   comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE);
-  mux2 #(`XLEN) altresultmux(ExtImmE, PCLinkE, JumpE, AltResultE);
-  mux2 #(`XLEN) aluresultmux(ALUPreResultE, AltResultE, ALUResultSrcE, ALUResultE);
+  mux2 #(`XLEN)   altresultmux(ExtImmE, PCLinkE, JumpE, AltResultE);
+  mux2 #(`XLEN)   aluresultmux(ALUPreResultE, AltResultE, ALUResultSrcE, ALUResultE);
 
   // Memory stage pipeline register
   flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM);
   flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM);
   assign MemAdrE = AddressE;   // *** clean up this naming
   assign PCTargetE = AddressE; // *** clean up this naming
-  flopenrc #(`XLEN) AddressNReg(clk, reset, FlushM, ~StallM, MemAdrE, MemAdrM);
+  flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, MemAdrE, MemAdrM);
   flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM);
-  flopenrc #(5)    RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM);	
-  mux2  #(`XLEN)   resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM);
+  flopenrc #(5)     RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM);	
+  mux2  #(`XLEN)    resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM);
   
   // Writeback stage pipeline register and logic
   flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW);
-  flopenrc #(5)    RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW);
+  flopenrc #(5)     RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
+  flopen #(`XLEN)   ReadDataWReg(.clk, .en(~StallW), .d(ReadDataM), .q(ReadDataW));
 
   // handle Store Conditional result if atomic extension supported
   generate
     if (`A_SUPPORTED)
-      assign SCResultW = SquashSCW ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0};
+      assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW};
     else 
       assign SCResultW = 0;
   endgenerate
 
-  flopen #(`XLEN) ReadDataWReg(.clk(clk),
-			      .en(~StallW),
-			      .d(ReadDataM),
-			      .q(ReadDataW));
-
   mux5  #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW);	 
 endmodule
diff --git a/wally-pipelined/testbench/tests.vh b/wally-pipelined/testbench/tests.vh
index ec3a53157..a12f96991 100644
--- a/wally-pipelined/testbench/tests.vh
+++ b/wally-pipelined/testbench/tests.vh
@@ -586,7 +586,6 @@ string imperas32f[] = '{
 
   string arch64i[] = '{
     `RISCVARCHTEST,
-    "rv64i_m/I/beq-01", "47010",
     "rv64i_m/I/add-01", "9010",
     "rv64i_m/I/addi-01", "6010",
     "rv64i_m/I/addiw-01", "6010",