diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do
index a50410053..5f1b14060 100644
--- a/wally-pipelined/regression/wally-pipelined.do
+++ b/wally-pipelined/regression/wally-pipelined.do
@@ -42,7 +42,7 @@ vsim workopt
 view wave
 
 -- display input and output signals as hexidecimal values
-do ./wave-dos/ahb-waves.do
+do ./wave-dos/cache-waves.do
 
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
diff --git a/wally-pipelined/regression/wave-dos/ahb-waves.do b/wally-pipelined/regression/wave-dos/ahb-waves.do
index f043d779e..c3a38563d 100644
--- a/wally-pipelined/regression/wave-dos/ahb-waves.do
+++ b/wally-pipelined/regression/wave-dos/ahb-waves.do
@@ -19,16 +19,8 @@ add wave -divider
 add wave -hex /testbench/dut/hart/ifu/PCF
 add wave -hex /testbench/dut/hart/ifu/PCD
 add wave -hex /testbench/dut/hart/ifu/InstrD
-
 add wave /testbench/InstrDName
 add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
-add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
-add wave -divider
-add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF
-add wave /testbench/dut/hart/ifu/ic/DelayF
-add wave /testbench/dut/hart/ifu/ic/DelaySideF
-add wave /testbench/dut/hart/ifu/ic/DelayD
-add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
 add wave -divider
 
 add wave -hex /testbench/dut/hart/ifu/PCE
diff --git a/wally-pipelined/regression/wave-dos/cache-waves.do b/wally-pipelined/regression/wave-dos/cache-waves.do
new file mode 100644
index 000000000..e39d40a07
--- /dev/null
+++ b/wally-pipelined/regression/wave-dos/cache-waves.do
@@ -0,0 +1,82 @@
+add wave /testbench/clk
+add wave /testbench/reset
+add wave -divider
+
+#add wave /testbench/dut/hart/ebu/IReadF
+add wave /testbench/dut/hart/DataStall
+add wave /testbench/dut/hart/InstrStall
+add wave /testbench/dut/hart/StallF
+add wave /testbench/dut/hart/StallD
+add wave /testbench/dut/hart/StallE
+add wave /testbench/dut/hart/StallM
+add wave /testbench/dut/hart/StallW
+add wave /testbench/dut/hart/FlushD
+add wave /testbench/dut/hart/FlushE
+add wave /testbench/dut/hart/FlushM
+add wave /testbench/dut/hart/FlushW
+
+add wave -divider
+add wave -hex /testbench/dut/hart/ifu/PCF
+add wave -hex /testbench/dut/hart/ifu/PCD
+add wave -hex /testbench/dut/hart/ifu/InstrD
+
+add wave /testbench/InstrDName
+add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
+add wave -hex /testbench/dut/hart/ifu/ic/controller/AlignedInstrRawD
+add wave -divider
+add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchState
+add wave -hex /testbench/dut/hart/ifu/ic/controller/FetchWordNum
+add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteEnable
+add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF
+add wave -hex /testbench/dut/hart/ifu/ic/InstrAckF
+add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWriteData
+add wave -hex /testbench/dut/hart/ifu/ic/controller/ICacheMemWritePAdr
+add wave -divider
+
+add wave -hex /testbench/dut/hart/ifu/PCE
+add wave -hex /testbench/dut/hart/ifu/InstrE
+add wave /testbench/InstrEName
+add wave -hex /testbench/dut/hart/ieu/dp/SrcAE
+add wave -hex /testbench/dut/hart/ieu/dp/SrcBE
+add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE
+#add wave /testbench/dut/hart/ieu/dp/PCSrcE
+add wave -divider
+
+add wave -hex /testbench/dut/hart/ifu/PCM
+add wave -hex /testbench/dut/hart/ifu/InstrM
+add wave /testbench/InstrMName
+add wave /testbench/dut/uncore/dtim/memwrite
+add wave -hex /testbench/dut/uncore/HADDR
+add wave -hex /testbench/dut/uncore/HWDATA
+add wave -divider
+
+add wave -hex /testbench/dut/hart/ebu/MemReadM
+add wave -hex /testbench/dut/hart/ebu/InstrReadF
+add wave -hex /testbench/dut/hart/ebu/BusState
+add wave -hex /testbench/dut/hart/ebu/NextBusState
+add wave -hex /testbench/dut/hart/ebu/HADDR
+add wave -hex /testbench/dut/hart/ebu/HREADY
+add wave -hex /testbench/dut/hart/ebu/HTRANS
+add wave -hex /testbench/dut/hart/ebu/HRDATA
+add wave -hex /testbench/dut/hart/ebu/HWRITE
+add wave -hex /testbench/dut/hart/ebu/HWDATA
+add wave -hex /testbench/dut/hart/ebu/CaptureDataM
+add wave -hex /testbench/dut/hart/ebu/InstrStall
+add wave -divider
+
+add wave -hex /testbench/dut/uncore/dtim/*
+add wave -divider
+
+add wave -hex /testbench/dut/hart/ifu/PCW
+add wave -hex /testbench/dut/hart/ifu/InstrW
+add wave /testbench/InstrWName
+add wave /testbench/dut/hart/ieu/dp/RegWriteW
+add wave -hex /testbench/dut/hart/ebu/ReadDataW
+add wave -hex /testbench/dut/hart/ieu/dp/ResultW
+add wave -hex /testbench/dut/hart/ieu/dp/RdW
+add wave -divider
+
+add wave -hex /testbench/dut/uncore/dtim/*
+add wave -divider
+
+add wave -hex -r /testbench/*
diff --git a/wally-pipelined/regression/wave-dos/default-waves.do b/wally-pipelined/regression/wave-dos/default-waves.do
index 4b6456512..ef4e30c61 100644
--- a/wally-pipelined/regression/wave-dos/default-waves.do
+++ b/wally-pipelined/regression/wave-dos/default-waves.do
@@ -23,11 +23,6 @@ add wave -hex /testbench/dut/hart/ifu/PCD
 add wave -hex /testbench/dut/hart/ifu/InstrD
 add wave /testbench/InstrDName
 add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
-add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
-add wave /testbench/dut/hart/ifu/ic/DelayF
-add wave /testbench/dut/hart/ifu/ic/DelaySideF
-add wave /testbench/dut/hart/ifu/ic/DelayD
-add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
 add wave -divider
 add wave -hex /testbench/dut/hart/ifu/PCE
 add wave -hex /testbench/dut/hart/ifu/InstrE
diff --git a/wally-pipelined/src/cache/line.sv b/wally-pipelined/src/cache/line.sv
index 6fe848e5e..d90cd2065 100644
--- a/wally-pipelined/src/cache/line.sv
+++ b/wally-pipelined/src/cache/line.sv
@@ -55,8 +55,8 @@ module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter
     genvar i;
     generate
         for (i=0; i < NUMWORDS; i++) begin
-            assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i];
-            flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]);
+            assign DataLinesIn[i] = WriteData[WORDSIZE*(i+1)-1:WORDSIZE*i];
+            flopenr #(WORDSIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]);
         end
     endgenerate
 
diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv
index 90ef018b3..58a287471 100644
--- a/wally-pipelined/src/ebu/ahblite.sv
+++ b/wally-pipelined/src/ebu/ahblite.sv
@@ -41,6 +41,7 @@ module ahblite (
   input  logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram
   input  logic             InstrReadF,
   output logic [`XLEN-1:0] InstrRData,
+  output logic             InstrAckF,
   // Signals from Data Cache
   input  logic [`XLEN-1:0] MemPAdrM,
   input  logic             MemReadM, MemWriteM,
@@ -171,6 +172,7 @@ module ahblite (
   assign #1 MMUReady = (NextBusState == MMUIDLE);
 
   assign InstrRData = HRDATA;
+  assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD) || (BusState == INSTRREADC) && (NextBusState != INSTRREADC);
   assign MMUReadPTE = HRDATA;
   assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021
   assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) ||
diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv
index 9542020d2..ecd3c3661 100644
--- a/wally-pipelined/src/hazard/hazard.sv
+++ b/wally-pipelined/src/hazard/hazard.sv
@@ -53,12 +53,12 @@ module hazard(
 
   assign BranchFlushDE = BPPredWrongE | RetM | TrapM;
 
-  assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE);  
+  assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE) | ICacheStallF;
   assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD) & ~(BranchFlushDE);    // stall in decode if instruction is a load/mul/csr dependent on previous
 //  assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD;    // stall in decode if instruction is a load/mul/csr dependent on previous
   assign StallECause = 0;
   assign StallMCause = 0; 
-  assign StallWCause = DataStall | InstrStall;
+  assign StallWCause = DataStall;
 
   // Each stage stalls if the next stage is stalled or there is a cause to stall this stage.
   assign StallF = StallD | StallFCause;
diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv
index c0f042869..df608a39f 100644
--- a/wally-pipelined/src/ifu/icache.sv
+++ b/wally-pipelined/src/ifu/icache.sv
@@ -36,6 +36,7 @@ module icache(
   input  logic [11:0]       LowerPCF,
   // Data read in from the ebu unit
   input  logic [`XLEN-1:0]  InstrInF,
+  input  logic              InstrAckF,
   // Read requested from the ebu unit
   output logic [`XLEN-1:0]  InstrPAdrF,
   output logic              InstrReadF,
@@ -77,6 +78,8 @@ module icache(
     );
 
     icachecontroller #(.LINESIZE(ICACHELINESIZE)) controller(.*);
+
+    assign FlushMem = 1'b0;
 endmodule
 
 module icachecontroller #(parameter LINESIZE = 256) (
@@ -116,6 +119,7 @@ module icachecontroller #(parameter LINESIZE = 256) (
     // Signals to/from ahblite interface
     // A read containing the requested data
     input  logic [`XLEN-1:0] InstrInF,
+    input  logic             InstrAckF,
     // The read we request from main memory
     output logic [`XLEN-1:0] InstrPAdrF,
     output logic             InstrReadF
@@ -163,22 +167,28 @@ module icachecontroller #(parameter LINESIZE = 256) (
     genvar i;
     generate
         for (i=0; i < WORDSPERLINE; i++) begin
-            flopenr #(32) flop(clk, reset, FetchState & (i == FetchWordNum), InstrInF, ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]);
+            flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), InstrInF, ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]);
         end
     endgenerate
 
+    // Enter the fetch state when we hit a cache fault
+    always_comb begin
+        assign BeginFetchState = ~ICacheMemReadValid & ~FetchState;
+    end
+
     // Machinery to request the correct addresses from main memory
     always_comb begin
-        assign InstrReadF = FetchState;
+        assign InstrReadF = FetchState & ~EndFetchState;
         assign LineAlignedPCPF = {UpperPCPF, LowerPCF[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}};
-        assign InstrPAdrF = LineAlignedPCPF + FetchWordNum*`XLEN;
-        assign NextFetchWordNum = FetchState ? FetchWordNum+1 : {LOGWPL+1{1'b0}}; 
+        assign InstrPAdrF = LineAlignedPCPF + FetchWordNum*(`XLEN/8);
+        assign NextFetchWordNum = FetchState ? FetchWordNum+InstrAckF : {LOGWPL+1{1'b0}}; 
     end
 
     // Write to cache memory when we have the line here
     always_comb begin
-        assign BeginFetchState = 1'b0;
-        assign EndFetchState = FetchWordNum == {1'b1, {LOGWPL{1'b0}}};
+        assign EndFetchState = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState;
+        assign ICacheMemWritePAdr = LineAlignedPCPF;
+        assign ICacheMemWriteEnable = EndFetchState;
     end
 
     // Stall the pipeline while loading a new line from memory
diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv
index c68786e50..747a2b495 100644
--- a/wally-pipelined/src/ifu/ifu.sv
+++ b/wally-pipelined/src/ifu/ifu.sv
@@ -32,6 +32,7 @@ module ifu (
   input  logic             FlushF, FlushD, FlushE, FlushM, FlushW,
   // Fetch
   input  logic [`XLEN-1:0] InstrInF,
+  input  logic             InstrAckF,
   output logic [`XLEN-1:0] PCF, 
   output logic [`XLEN-1:0] InstrPAdrF,
   output logic             InstrReadF,
diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv
index c858befd3..49214b0d1 100644
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@@ -112,7 +112,7 @@ module wallypipelinedhart (
   logic [`XLEN-1:0] InstrRData;
   logic             InstrReadF;
   logic             DataStall, InstrStall;
-  logic             InstrAckD, MemAckW;
+  logic             InstrAckF, MemAckW;
 
   logic             BPPredWrongE, BPPredWrongM;
   logic [3:0]       InstrClassM;
diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv
index 8b128b17a..37d9883ee 100644
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@@ -370,7 +370,8 @@ string tests32i[] = {
 
   // Track names of instructions
   instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
-                dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
+                dut.hart.ifu.ic.controller.AlignedInstrRawF,
+                dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
                 dut.hart.ifu.InstrM,  dut.hart.ifu.InstrW,
                 InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);