diff --git a/pipelined/testbench/testbench-fpga.sv b/pipelined/testbench/testbench-fpga.sv
index 5997180f4..c815d8a02 100644
--- a/pipelined/testbench/testbench-fpga.sv
+++ b/pipelined/testbench/testbench-fpga.sv
@@ -56,6 +56,7 @@ logic [3:0] dummy;
   logic             HREADYEXT, HRESPEXT;
   logic [31:0]      HADDR;
   logic [`AHBW-1:0] HWDATA;
+  logic [`XLEN/8-1:0] HWSTRB;
   logic             HWRITE;
   logic [2:0]       HSIZE;
   logic [2:0]       HBURST;
@@ -105,7 +106,7 @@ logic [3:0] dummy;
   assign UARTSin = 1;
 
   wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
-                        .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
+                        .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT,
                         .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
                         .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); 
 
@@ -162,7 +163,7 @@ logic [3:0] dummy;
   always @(negedge clk)
     begin    
       if (TEST == "coremark")
-        if (dut.core.priv.priv.ecallM) begin
+        if (dut.core.priv.priv.EcallFaultM) begin
           $display("Benchmark: coremark is done.");
           $stop;
         end
@@ -342,8 +343,13 @@ module DCacheFlushFSM
 	  localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
 	  localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
 	  localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
-	  localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN;  
-	  localparam integer lognumlines = $clog2(numlines);
+	  localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN;
+	  localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN;            
+	  localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM;
+      
+//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM;
+	  localparam integer numwords = sramlen/`XLEN;
+      localparam integer lognumlines = $clog2(numlines);
 	  localparam integer loglinebytelen = $clog2(linebytelen);
 	  localparam integer lognumways = $clog2(numways);
 	  localparam integer tagstart = lognumlines + loglinebytelen;
@@ -351,65 +357,71 @@ module DCacheFlushFSM
 
 
 	  genvar 			 index, way, cacheWord;
-	  logic [`XLEN-1:0]  CacheData [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic [`XLEN-1:0]  CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic 			 CacheValid  [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic 			 CacheDirty  [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
-      for(index = 0; index < numlines; index++) begin
-		for(way = 0; way < numways; way++) begin
-		  for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
-			copyShadow #(.tagstart(tagstart),
-						 .loglinebytelen(loglinebytelen))
-			copyShadow(.clk,
-					   .start,
-					   .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
-					   .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
-					   .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
-					   .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
-					   .index(index),
-					   .cacheWord(cacheWord),
-					   .CacheData(CacheData[way][index][cacheWord]),
-					   .CacheAdr(CacheAdr[way][index][cacheWord]),
-					   .CacheTag(CacheTag[way][index][cacheWord]),
-					   .CacheValid(CacheValid[way][index][cacheWord]),
-					   .CacheDirty(CacheDirty[way][index][cacheWord]));
-		  end
-		end
+	  logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
+      logic [sramlen-1:0] cacheline;
+	  logic [`XLEN-1:0]  CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
+	  logic 			 CacheValid  [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
+	  logic 			 CacheDirty  [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
+	  logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
+    for(index = 0; index < numlines; index++) begin
+		  for(way = 0; way < numways; way++) begin
+		    for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin
+			    copyShadow #(.tagstart(tagstart),
+					.loglinebytelen(loglinebytelen), .sramlen(sramlen))
+			    copyShadow(.clk,
+          .start,
+          .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
+          .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
+          .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
+          .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
+          .index(index),
+          .cacheWord(cacheWord),
+          .CacheData(CacheData[way][index][cacheWord]),
+          .CacheAdr(CacheAdr[way][index][cacheWord]),
+          .CacheTag(CacheTag[way][index][cacheWord]),
+          .CacheValid(CacheValid[way][index][cacheWord]),
+          .CacheDirty(CacheDirty[way][index][cacheWord]));
+        end
       end
+    end
 
-	  integer i, j, k;
+    integer i, j, k, l;
 
-	  always @(posedge clk) begin
-		if (start) begin #1
-		  #1
-			for(i = 0; i < numlines; i++) begin
-			  for(j = 0; j < numways; j++) begin
-				for(k = 0; k < numwords; k++) begin
-				  if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
-					ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
-				  end
-				end	
-			  end
-			end
-		end
-	  end
-
-	  
-	end
+    always @(posedge clk) begin
+      if (start) begin #1
+        #1
+        for(i = 0; i < numlines; i++) begin
+          for(j = 0; j < numways; j++) begin
+            for(l = 0; l < cachesramwords; l++) begin
+              if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin
+                for(k = 0; k < numwords; k++) begin
+                  //cacheline = CacheData[j][i][0];
+                  // does not work with modelsim
+                  // # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions.
+                  // see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions
+                  //ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k];
+                  ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN];
+                end
+              end
+            end
+          end
+        end
+      end
+    end  
+  end
   flop #(1) doneReg(.clk, .d(start), .q(done));
 endmodule
 
 module copyShadow
-  #(parameter tagstart, loglinebytelen)
+  #(parameter tagstart, loglinebytelen, sramlen)
   (input logic clk,
    input logic 			     start,
    input logic [`PA_BITS-1:tagstart] tag,
    input logic 			     valid, dirty,
-   input logic [`XLEN-1:0] 	     data,
+   input logic [sramlen-1:0] 	     data,
    input logic [32-1:0] 	     index,
    input logic [32-1:0] 	     cacheWord,
-   output logic [`XLEN-1:0] 	     CacheData,
+   output logic [sramlen-1:0] 	     CacheData,
    output logic [`PA_BITS-1:0] 	     CacheAdr,
    output logic [`XLEN-1:0] 	     CacheTag,
    output logic 		     CacheValid,
@@ -422,9 +434,8 @@ module copyShadow
       CacheValid = valid;
       CacheDirty = dirty;
       CacheData = data;
-      CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8));
+      CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8));
     end
   end
   
-endmodule		      
-
+endmodule