diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do
index 753e51958..eba2ff090 100644
--- a/wally-pipelined/regression/wave.do
+++ b/wally-pipelined/regression/wave.do
@@ -118,18 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart
 add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW
 add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW
 add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags
-add wave -noupdate -group alu -divider internals
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt
-add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags
+add wave -noupdate -expand -group alu -divider internals
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt
+add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D
 add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E
@@ -239,6 +239,7 @@ add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testben
 add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WriteDataM
 add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMBlockWriteEnableM
 add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMWordWriteEnableM
+add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/AnyCPUReqE
 add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMWayWriteEnable
 add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMWordEnable
 add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SelAdrM
@@ -258,12 +259,14 @@ add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cach
 add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[2]/CacheDataMem/StoredData}
 add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[3]/CacheDataMem/WriteEnable}
 add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/hart/lsu/dcache/CacheWays[0]/MemWay/word[3]/CacheDataMem/StoredData}
-add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/SRAMAdr
-add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockWayM
-add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockWayMaskedM
-add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockM
-add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadTag
-add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/WayHit
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/SRAMAdr
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockWayM
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockWayMaskedM
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataBlockM
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadDataWordM
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/FinalReadDataWordM
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/ReadTag
+add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/hart/lsu/dcache/WayHit
 add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimReadDataBLockWayMaskedM
 add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimReadDataBlockM
 add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimWay
@@ -390,34 +393,8 @@ add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBHit
 add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/VirtualAddress
 add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/PhysicalAddress
 add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268436996]}
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268436997]}
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268436998]}
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268436999]}
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268437000]}
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268437011]}
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268437012]}
-add wave -noupdate {/testbench/dut/uncore/dtim/RAM[268437268]}
-add wave -noupdate /testbench/dut/uncore/dtim/RAM
-add wave -noupdate /testbench/dut/uncore/dtim/A
-add wave -noupdate /testbench/dut/uncore/dtim/HWDATA
-add wave -noupdate /testbench/dut/uncore/dtim/memwrite
-add wave -noupdate /testbench/dut/uncore/dtim/risingHREADYTim
-add wave -noupdate /testbench/dut/uncore/dtim/memread
-add wave -noupdate /testbench/dut/hart/lsu/dcache/ReadDataBlockWayM
-add wave -noupdate /testbench/dut/uncore/dtim/HCLK
-add wave -noupdate /testbench/dut/hart/clk
-add wave -noupdate /testbench/DCacheFlushFSM/CacheData
-add wave -noupdate /testbench/DCacheFlushFSM/CacheAdr
-add wave -noupdate /testbench/DCacheFlushFSM/CacheData
-add wave -noupdate /testbench/DCacheFlushFSM/CacheDirty
-add wave -noupdate /testbench/DCacheFlushFSM/CacheTag
-add wave -noupdate /testbench/DCacheFlushFSM/CacheValid
-add wave -noupdate -expand -group shadowram /testbench/DCacheFlushFSM/clk
-add wave -noupdate -expand -group shadowram /testbench/DCacheFlushFSM/start
-add wave -noupdate -expand -group shadowram -color Orchid /testbench/DCacheFlushFSM/ShadowRAM
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 12} {63589 ns} 0} {{Cursor 13} {4851 ns} 0} {{Cursor 3} {58080 ns} 0}
+WaveRestoreCursors {{Cursor 12} {1053664 ns} 0} {{Cursor 13} {4851 ns} 0} {{Cursor 3} {58080 ns} 0}
 quietly wave cursor active 1
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 297
@@ -433,4 +410,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {63529 ns} {63661 ns}
+WaveRestoreZoom {1053586 ns} {1053736 ns}
diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv
index d4391cc61..64ed53670 100644
--- a/wally-pipelined/src/cache/dcache.sv
+++ b/wally-pipelined/src/cache/dcache.sv
@@ -34,11 +34,9 @@ module dcache
    input logic 		       FlushW,
 
    // cpu side
-   input logic [1:0] 	       MemRWE,
    input logic [1:0] 	       MemRWM,
    input logic [2:0] 	       Funct3M,
    input logic [6:0] 	       Funct7M,
-   input logic [1:0] 	       AtomicE, 
    input logic [1:0] 	       AtomicM,
    input logic [`XLEN-1:0]     MemAdrE, // virtual address, but we only use the lower 12 bits.
    input logic [`PA_BITS-1:0]  MemPAdrM, // physical address
@@ -301,7 +299,6 @@ module dcache
   // control path *** eventually move to own module.
 
   logic AnyCPUReqM;
-  logic AnyCPUReqE;
   logic FetchCountFlag;
   logic PreCntEn;
   logic CntEn;
@@ -333,7 +330,6 @@ module dcache
 		STATE_AMO_MISS_WRITE_WORD,
 		STATE_AMO_UPDATE,
 		STATE_AMO_WRITE,
-		STATE_SRAM_BUSY,
 		STATE_PTW_READY,
 		STATE_PTW_MISS_FETCH_WDV,
 		STATE_PTW_MISS_FETCH_DONE,
@@ -352,7 +348,6 @@ module dcache
   
 
   assign AnyCPUReqM = |MemRWM | (|AtomicM);
-  assign AnyCPUReqE = |MemRWE | (|AtomicE);  
   assign FetchCountFlag = (FetchCount == FetchCountThreshold[LOGWPL:0]);
 
   flopenr #(LOGWPL+1) 
@@ -373,17 +368,6 @@ module dcache
 	      .q({SRAMWordWriteEnableW}));
   
 
-  // fsm state regs
-/* -----\/----- EXCLUDED -----\/-----
-  flopenl #(.TYPE(statetype))
-  FSMReg(.clk(clk),
-	 .load(reset),
-	 .en(1'b1),
-	 .val(STATE_READY),
-	 .d(NextState),
-	 .q(CurrState));
- -----/\----- EXCLUDED -----/\----- */
-
   always_ff @(posedge clk, posedge reset)
     if (reset)    CurrState <= #1 STATE_READY;
     else CurrState <= #1 NextState;
@@ -409,13 +393,8 @@ module dcache
         
     case (CurrState)
       STATE_READY: begin
-	// sram busy
-	if (AnyCPUReqE & SRAMWordWriteEnableM) begin
-	  NextState = STATE_SRAM_BUSY;
-	  DCacheStall = 1'b1;
-	end
 	// TLB Miss	
-	else if(AnyCPUReqM & DTLBMissM) begin                      
+	if(AnyCPUReqM & DTLBMissM) begin                      
 	  NextState = STATE_PTW_MISS_FETCH_WDV;
 	end
 	// amo hit
@@ -434,6 +413,7 @@ module dcache
 	  DCacheStall = 1'b0;
 	  SRAMWordWriteEnableM = 1'b1;
 	  SetDirtyM = 1'b1;
+	  
 	  if(StallW) NextState = STATE_CPU_BUSY;
 	  else NextState = STATE_READY;
 	end
@@ -444,7 +424,7 @@ module dcache
 	  DCacheStall = 1'b1;
 	end
 	// fault
-	else if(|MemRWM & FaultM & ~DTLBMissM) begin
+	else if(AnyCPUReqM & FaultM & ~DTLBMissM) begin
 	  NextState = STATE_READY;
 	end
 	else NextState = STATE_READY;
@@ -512,13 +492,8 @@ module dcache
 	SRAMWordWriteEnableM = 1'b1;
 	SetDirtyM = 1'b1;
 	SelAdrM = 1'b1;
-	if (AnyCPUReqE & SRAMWordWriteEnableM) begin
-	  NextState = STATE_SRAM_BUSY;
-	  DCacheStall = 1'b1;
-	end else begin
-	  NextState = STATE_READY;
-	  DCacheStall = 1'b0;
-	end
+	NextState = STATE_READY;
+	DCacheStall = 1'b0;
       end
 
       STATE_MISS_EVICT_DIRTY: begin
@@ -543,11 +518,6 @@ module dcache
 	end
       end
 
-      STATE_SRAM_BUSY: begin
-	DCacheStall = 1'b0;
-	NextState = STATE_READY;
-      end
-
       STATE_CPU_BUSY : begin
 	if(StallW) NextState = STATE_CPU_BUSY;
 	else NextState = STATE_READY;
diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv
index f55521061..331fc3267 100644
--- a/wally-pipelined/src/hazard/hazard.sv
+++ b/wally-pipelined/src/hazard/hazard.sv
@@ -30,7 +30,7 @@ module hazard(
 	      input logic  reset,
   // Detect hazards
 	      input logic  BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
-	      input logic  LoadStallD, MulDivStallD, CSRRdStallD,
+	      input logic  LoadStallD, StoreStallD, MulDivStallD, CSRRdStallD,
 	      input logic  DCacheStall, ICacheStallF,
               input logic  FPUStallD, FStallD,
 	      input logic  DivBusyE,FDivBusyE,
@@ -56,7 +56,7 @@ module hazard(
   // If any stages are stalled, the first stage that isn't stalled must flush.
 
   assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE);
-  assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE);    // stall in decode if instruction is a load/mul/csr dependent on previous
+  assign StallDCause = (LoadStallD | StoreStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE);    // stall in decode if instruction is a load/mul/csr dependent on previous
   assign StallECause = DivBusyE | FDivBusyE;
   assign StallMCause = 0; 
   assign StallWCause = DCacheStall | ICacheStallF;
diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv
index 09715a4b7..879767365 100644
--- a/wally-pipelined/src/ieu/controller.sv
+++ b/wally-pipelined/src/ieu/controller.sv
@@ -63,7 +63,8 @@ module controller(
   output logic [2:0] ResultSrcW,
   output logic       InstrValidW,
   // Stall during CSRs
-  output logic       CSRWritePendingDEM
+  output logic       CSRWritePendingDEM,
+  output logic       StoreStallD
 );
 
   logic [6:0] OpD;
@@ -219,5 +220,7 @@ module controller(
                          {RegWriteM, ResultSrcM, InstrValidM},
                          {RegWriteW, ResultSrcW, InstrValidW});  
 
-  assign CSRWritePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM;   
+  assign CSRWritePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM;
+
+  assign StoreStallD = MemRWE[0] & (|MemRWD | |AtomicD);
 endmodule
diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv
index 8cac09375..95761c36e 100644
--- a/wally-pipelined/src/ieu/ieu.sv
+++ b/wally-pipelined/src/ieu/ieu.sv
@@ -71,7 +71,8 @@ module ieu (
   input logic 		   DivDoneE,
   input logic 		   DivBusyE,
   output logic 		   CSRReadM, CSRWriteM, PrivilegedM,
-  output logic 		   CSRWritePendingDEM
+  output logic 		   CSRWritePendingDEM,
+  output logic             StoreStallD
 );
 
   logic [2:0]  ImmSrcD;
diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv
index 29190c3c6..432645f71 100644
--- a/wally-pipelined/src/lsu/lsu.sv
+++ b/wally-pipelined/src/lsu/lsu.sv
@@ -37,11 +37,9 @@ module lsu
 
    // connected to cpu (controls)
    input logic [1:0] 	       MemRWM,
-   input logic [1:0] 	       MemRWE, 
    input logic [2:0] 	       Funct3M,
    input logic [6:0] 	       Funct7M, 
    input logic [1:0] 	       AtomicM,
-   input logic [1:0] 	       AtomicE,   
    output logic 	       CommittedM, 
    output logic 	       SquashSCW,
    output logic 	       DataMisalignedM,
@@ -301,12 +299,10 @@ module lsu
 		.StallW(StallW),
 		.FlushM(FlushM),
 		.FlushW(FlushW),
-		.MemRWE(MemRWE),		 // *** add to arb
 		.MemRWM(MemRWMtoDCache),
 		.Funct3M(Funct3MtoDCache),
 		.Funct7M(Funct7M),		
 		.AtomicM(AtomicMtoDCache),
-		.AtomicE(AtomicE),	    // *** add to arb
 		.MemAdrE(MemAdrEtoDCache),  // *** add to arb
 		.MemPAdrM(MemPAdrM),
 		.WriteDataM(WriteDataM),
diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv
index 5bcd4697b..f094df60a 100644
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@@ -91,7 +91,7 @@ module wallypipelinedhart
   logic 		    DivDoneE;
   logic 		    DivBusyE;
   logic 		    RegWriteD;
-  logic 		    LoadStallD, MulDivStallD, CSRRdStallD;
+  logic 		    LoadStallD, StoreStallD, MulDivStallD, CSRRdStallD;
   logic 		    SquashSCM, SquashSCW;
   // floating point unit signals
   logic [2:0] 		    FRM_REGW;
@@ -176,11 +176,9 @@ module wallypipelinedhart
 	  .StallW(StallW),
 	  .FlushW(FlushW),
 	  // CPU interface
-	  .MemRWE(MemRWE),                  	  
 	  .MemRWM(MemRWM),                  
 	  .Funct3M(Funct3M),
 	  .Funct7M(InstrM[31:25]),
-	  .AtomicE(AtomicE),
 	  .AtomicM(AtomicM),               
 	  .CommittedM(CommittedM),          
 	  .SquashSCW(SquashSCW),