From bc9c944ba0a4a0f5840c5693d3e4997ee5428ad1 Mon Sep 17 00:00:00 2001
From: Ross Thompson <stephen.thompson.37@us.af.mil>
Date: Mon, 28 Jun 2021 17:26:11 -0500
Subject: [PATCH] Don't use this branch walker still broken.

---
 wally-pipelined/regression/wave.do            |  28 +++-
 wally-pipelined/src/lsu/lsu.sv                |  97 +++++++------
 wally-pipelined/src/lsu/lsuArb.sv             |  12 +-
 wally-pipelined/src/mmu/pagetablewalker.sv    | 131 ++++++++++++------
 wally-pipelined/src/mmu/tlb.sv                |   2 +-
 .../src/wally/wallypipelinedhart.sv           |  17 ++-
 6 files changed, 194 insertions(+), 93 deletions(-)

diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do
index a95dc5f9..a051f071 100644
--- a/wally-pipelined/regression/wave.do
+++ b/wally-pipelined/regression/wave.do
@@ -218,9 +218,11 @@ add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/har
 add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM
 add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM
 add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF
+add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemSizeM
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESETn
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA
+add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATANext
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR
@@ -234,9 +236,12 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
 add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
+add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW
 add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/CurrState
-add wave -noupdate -expand -group lsu /testbench/dut/hart/arbiter/MemAdrM
+add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall
+add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM
 add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM
+add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW
 add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM
 add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM
 add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM
@@ -282,8 +287,25 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME
 add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP
 add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM
 add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/PRegEn
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/WalkerState
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReady
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWStall
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUReadPTE
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/CurrentPTE
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE
+add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE
+add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWTranslate
+add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWPAdr
+add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReadPTE
+add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/HPTWReady
+add wave -noupdate -expand -group {LSU ARB} -expand -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU
+add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/arbiter/SelPTW
+add wave -noupdate /testbench/dut/hart/lsu/DataStall
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {11165332 ns} 0} {{Cursor 3} {7672141 ns} 0}
+WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {11172098 ns} 0} {{Cursor 3} {7672141 ns} 0}
 quietly wave cursor active 2
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 189
@@ -299,4 +321,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {11156770 ns} {11173894 ns}
+WaveRestoreZoom {11171939 ns} {11172253 ns}
diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv
index 4689b422..dc554e52 100644
--- a/wally-pipelined/src/lsu/lsu.sv
+++ b/wally-pipelined/src/lsu/lsu.sv
@@ -29,71 +29,73 @@
 
 // *** Ross Thompson amo misalignment check?
 module lsu (
-  input  logic             clk, reset,
-  input  logic             StallM, FlushM, StallW, FlushW,
-  output logic             DataStall,
-  output logic             HPTWReady,
+  input logic 		      clk, reset,
+  input logic 		      StallM, FlushM, StallW, FlushW,
+  output logic 		      DataStall,
+  output logic 		      HPTWReady,
   // Memory Stage
 
   // connected to cpu (controls)
-  input  logic [1:0]       MemRWM,
-  input  logic [2:0]       Funct3M,
-  input  logic [1:0]       AtomicM,
-  output logic             CommittedM,    
-  output logic             SquashSCW,
-  output logic             DataMisalignedM,
+  input logic [1:0] 	      MemRWM,
+  input logic [2:0] 	      Funct3M,
+  input logic [1:0] 	      AtomicM,
+  output logic 		      CommittedM, 
+  output logic 		      SquashSCW,
+  output logic 		      DataMisalignedM,
 
   // address and write data
-  input  logic [`XLEN-1:0] MemAdrM,
-  input  logic [`XLEN-1:0] WriteDataM, 
-  output  logic [`XLEN-1:0] ReadDataW,    // from ahb
+  input logic [`XLEN-1:0]     MemAdrM,
+  input logic [`XLEN-1:0]     WriteDataM, 
+  output logic [`XLEN-1:0]    ReadDataW,
 
   // cpu privilege
-  input logic  [1:0]       PrivilegeModeW,
-  input logic              DTLBFlushM,
+  input logic [1:0] 	      PrivilegeModeW,
+  input logic 		      DTLBFlushM,
   // faults
-  input  logic             NonBusTrapM, 
-  output logic             DTLBLoadPageFaultM, DTLBStorePageFaultM,
-  output logic             LoadMisalignedFaultM, LoadAccessFaultM,
+  input logic 		      NonBusTrapM, 
+  output logic 		      DTLBLoadPageFaultM, DTLBStorePageFaultM,
+  output logic 		      LoadMisalignedFaultM, LoadAccessFaultM,
   // cpu hazard unit (trap)
-  output logic             StoreMisalignedFaultM, StoreAccessFaultM,
+  output logic 		      StoreMisalignedFaultM, StoreAccessFaultM,
 
   // connect to ahb
-  input  logic             CommitM,        // should this be generated in the abh interface?
-  output logic [`PA_BITS-1:0] MemPAdrM,    // to ahb
-  output logic             MemReadM, MemWriteM,
-  output logic [1:0]       AtomicMaskedM,
-  input  logic             MemAckW,      // from ahb
-  input  logic [`XLEN-1:0] HRDATAW,    // from ahb
+  input logic 		      CommitM, // should this be generated in the abh interface?
+  output logic [`PA_BITS-1:0] MemPAdrM, // to ahb
+  output logic 		      MemReadM, MemWriteM,
+  output logic [1:0] 	      AtomicMaskedM,
+  input logic 		      MemAckW, // from ahb
+  input logic [`XLEN-1:0]     HRDATAW, // from ahb
+  output logic [2:0] 	      Funct3MfromLSU,
+	    output logic StallWfromLSU,
 
 
   // mmu management
 
   // page table walker
-  input logic  [`XLEN-1:0] PageTableEntryM,
-  input logic  [1:0]       PageTypeM,
-  input logic  [`XLEN-1:0] SATP_REGW,   // from csr
-  input logic              STATUS_MXR, STATUS_SUM, // from csr
-  input logic              DTLBWriteM,
-  output logic             DTLBMissM,
-  input logic              DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB.
+  input logic [`XLEN-1:0]     PageTableEntryM,
+  input logic [1:0] 	      PageTypeM,
+  input logic [`XLEN-1:0]     SATP_REGW, // from csr
+  input logic 		      STATUS_MXR, STATUS_SUM, // from csr
+  input logic 		      DTLBWriteM,
+  output logic 		      DTLBMissM,
+  input logic 		      DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB.
 
 
 
-  output logic             DTLBHitM,  // not connected 
+  output logic 		      DTLBHitM, // not connected 
   
   // PMA/PMP (inside mmu) signals
-  input  logic [31:0]      HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well.
-  input  logic [2:0]       HSIZE,
-  input  logic             HWRITE,
-  input  logic             AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage.
-  input  logic [63:0]      PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
-  input  var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker.
+  input logic [31:0] 	      HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well.
+  input logic [2:0] 	      HSIZE,
+  input logic 		      HWRITE,
+  input logic 		      AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage.
+  input logic [63:0] 	      PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
+  input 		      var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker.
 
-  output  logic            PMALoadAccessFaultM, PMAStoreAccessFaultM,
-  output  logic            PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa.
+  output logic 		      PMALoadAccessFaultM, PMAStoreAccessFaultM,
+  output logic 		      PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa.
   
-  output logic             DSquashBusAccessM
+  output logic 		      DSquashBusAccessM
 //  output logic [5:0]       DHSELRegionsM
   
 );
@@ -246,13 +248,15 @@ module lsu (
 	end
       end
       STATE_FETCH: begin
-	DataStall = 1'b1;	
 	if (MemAckW & ~StallW) begin
 	  NextState = STATE_READY;
+	  DataStall = 1'b0;	
 	end else if (MemAckW & StallW) begin
 	  NextState = STATE_STALLED;
+	  DataStall = 1'b1;	
 	end else begin
 	  NextState = STATE_FETCH;
+	  DataStall = 1'b1;
 	end
       end
       STATE_STALLED: begin
@@ -268,7 +272,12 @@ module lsu (
 	NextState = STATE_READY;
       end
     endcase
-  end
+  end // always_comb
+
+  // *** for now just pass through size
+  assign Funct3MfromLSU = Funct3M;
+  assign StallWfromLSU = StallW;
+  
 
 endmodule
 
diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv
index e1a3b996..d7e62782 100644
--- a/wally-pipelined/src/lsu/lsuArb.sv
+++ b/wally-pipelined/src/lsu/lsuArb.sv
@@ -35,6 +35,7 @@ module lsuArb
    // to page table walker.
    output logic [`XLEN-1:0] HPTWReadPTE,
    output logic 	    HPTWReady,
+   output logic 	    HPTWStall, 
 
    // from CPU
    input logic [1:0] 	    MemRWM,
@@ -42,6 +43,7 @@ module lsuArb
    input logic [1:0] 	    AtomicM,
    input logic [`XLEN-1:0]  MemAdrM,
    input logic [`XLEN-1:0]  WriteDataM,
+   input logic 		    StallW,
    // to CPU
    output logic [`XLEN-1:0] ReadDataW,
    output logic 	    CommittedM, 
@@ -56,12 +58,13 @@ module lsuArb
    output logic [1:0] 	    AtomicMtoLSU,
    output logic [`XLEN-1:0] MemAdrMtoLSU,
    output logic [`XLEN-1:0] WriteDataMtoLSU,
+   output logic 	    StallWtoLSU,
    // from LSU
    input logic 		    CommittedMfromLSU,
    input logic 		    SquashSCWfromLSU,
    input logic 		    DataMisalignedMfromLSU,
    input logic [`XLEN-1:0]  ReadDataWFromLSU,
-   input logic              HPTWReadyfromLSU,		    
+   input logic 		    HPTWReadyfromLSU, 
    input logic 		    DataStall
   
    );
@@ -124,6 +127,7 @@ module lsuArb
   assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM;
   assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM;
   assign WriteDataMtoLSU = SelPTW ? `XLEN'b0 : WriteDataM;
+  assign StallWtoLSU = SelPTW ? 1'b0 : StallW;
 
   // demux the inputs from LSU to walker or cpu's data port.
 
@@ -133,6 +137,10 @@ module lsuArb
   assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU;
   assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU;
   assign HPTWReady = HPTWReadyfromLSU;
-  assign DCacheStall = DataStall; // *** this is probably going to change.
+  // *** need to rename DcacheStall and Datastall.
+  // not clear at all.  I think it should be LSUStall from the LSU,
+  // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one).
+  assign HPTWStall = SelPTW ? DataStall : 1'b1;
+  assign DCacheStall = SelPTW ? 1'b0 : DataStall; // *** this is probably going to change.
   
 endmodule
diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv
index 21749ec5..eecd46f1 100644
--- a/wally-pipelined/src/mmu/pagetablewalker.sv
+++ b/wally-pipelined/src/mmu/pagetablewalker.sv
@@ -55,6 +55,7 @@ module pagetablewalker (
   // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU
   input  logic [`XLEN-1:0] MMUReadPTE,
   input  logic             MMUReady,
+  input  logic             HPTWStall,
 
   // *** modify to send to LSU
   output logic [`XLEN-1:0] MMUPAdr,
@@ -140,14 +141,22 @@ module pagetablewalker (
   assign PageTypeF = PageType;
   assign PageTypeM = PageType;
 
-localparam LEVEL0 = 3'h0;
-  localparam LEVEL1 = 3'h1;
+  localparam LEVEL0_WDV = 4'h0;
+  localparam LEVEL0 = 4'h8;  
+  localparam LEVEL1_WDV = 4'h1;
+  localparam LEVEL1 = 4'h9;
+  localparam LEVEL2_WDV = 4'h2;
+  localparam LEVEL2 = 4'hA;  
+  localparam LEVEL3_WDV = 4'h3;
+  localparam LEVEL3 = 4'hB;
   // space left for more levels
-  localparam LEAF = 3'h5;
-  localparam IDLE = 3'h6;
-  localparam FAULT = 3'h7;
+  localparam LEAF = 4'h5;  
+  localparam IDLE = 4'h6;
+  localparam FAULT = 4'h7;
 
-  logic [2:0] WalkerState, NextWalkerState;
+  logic [3:0] WalkerState, NextWalkerState;
+
+  logic       PRegEn;
 
   generate
     if (`XLEN == 32) begin
@@ -155,27 +164,32 @@ localparam LEVEL0 = 3'h0;
 
       flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
 
+      assign PRegEn = (WalkerState == LEVEL1 || WalkerState == LEVEL0) && ~HPTWStall;
+
       // State transition logic
       always_comb begin
         case (WalkerState)
-          IDLE:   if      (MMUTranslate)           NextWalkerState = LEVEL1;
+          IDLE:   if      (MMUTranslate)           NextWalkerState = LEVEL1_WDV;
                   else                             NextWalkerState = IDLE;
-          LEVEL1: if      (~MMUReady)              NextWalkerState = LEVEL1;
+          LEVEL1_WDV: if      (HPTWStall)          NextWalkerState = LEVEL1_WDV;
+	              else                         NextWalkerState = LEVEL1;
+	  LEVEL1: 
                   // *** <FUTURE WORK> According to the architecture, we should
                   // fault upon finding a superpage that is misaligned or has 0
                   // access bit. The following commented line of code is
                   // supposed to perform that check. However, it is untested.
-                  else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
+                  if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
                   // else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;  // *** Once the above line is properly tested, delete this line.
-                  else if (ValidPTE && ~LeafPTE)   NextWalkerState = LEVEL0;
+                  else if (ValidPTE && ~LeafPTE)   NextWalkerState = LEVEL0_WDV;
                   else                             NextWalkerState = FAULT;
-          LEVEL0: if      (~MMUReady)              NextWalkerState = LEVEL0;
-                  else if (ValidPTE && LeafPTE && ~AccessAlert)
+          LEVEL0_WDV: if      (HPTWStall)          NextWalkerState = LEVEL0_WDV;
+	              else                         NextWalkerState = LEVEL0;
+	  LEVEL0: if (ValidPTE && LeafPTE && ~AccessAlert)
                                                    NextWalkerState = LEAF;
                   else                             NextWalkerState = FAULT;
-          LEAF:   if      (MMUTranslate)           NextWalkerState = LEVEL1;
+          LEAF:   if      (MMUTranslate)           NextWalkerState = LEVEL1_WDV;
                   else                             NextWalkerState = IDLE;
-          FAULT:  if      (MMUTranslate)           NextWalkerState = LEVEL1;
+          FAULT:  if      (MMUTranslate)           NextWalkerState = LEVEL1_WDV;
                   else                             NextWalkerState = IDLE;
           // Default case should never happen, but is included for linter.
           default:                                 NextWalkerState = IDLE;
@@ -201,7 +215,7 @@ localparam LEVEL0 = 3'h0;
         WalkerLoadPageFaultM = '0;
         WalkerStorePageFaultM = '0;
         MMUStall = '1;
-
+	
         case (NextWalkerState)
           IDLE: begin
             MMUStall = '0;
@@ -209,9 +223,15 @@ localparam LEVEL0 = 3'h0;
           LEVEL1: begin
             TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
           end
+          LEVEL1_WDV: begin
+            TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
+          end
           LEVEL0: begin
             TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
           end
+          LEVEL0_WDV: begin
+            TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
+          end
           LEAF: begin
             // Keep physical address alive to prevent HADDR dropping to 0
             TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
@@ -233,9 +253,16 @@ localparam LEVEL0 = 3'h0;
         endcase
       end
 
-      // Capture page table entry from ahblite
-      flopenr #(32) ptereg(clk, reset, MMUReady, MMUReadPTE, SavedPTE);
-      mux2 #(32) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE);
+      // Capture page table entry from data cache
+      // *** may need to delay reading this value until the next clock cycle.
+      // The clk to q latency of the SRAM in the data cache will be long.
+      // I cannot see directly using this value.  This is no different than
+      // a load delay hazard.  This will require rewriting the walker fsm.
+      // also need a new signal to save.  Should be a mealy output of the fsm
+      // request followed by ~stall.
+      flopenr #(32) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE);
+      //mux2 #(32) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE);
+      assign CurrentPTE = SavedPTE;
       assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10];
 
       // Assign outputs to ahblite
@@ -244,61 +271,70 @@ localparam LEVEL0 = 3'h0;
       assign MMUPAdr = TranslationPAdr[31:0];
 
     end else begin
-      localparam LEVEL2 = 3'h2;
-      localparam LEVEL3 = 3'h3;
       
       logic [8:0] VPN3, VPN2, VPN1, VPN0;
 
       logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
 
-      flopenl #(3) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
+      flopenl #(4) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
+
+      assign PRegEn = (WalkerState == LEVEL1 || WalkerState == LEVEL0 ||
+		       WalkerState == LEVEL2 || WalkerState == LEVEL3) && ~HPTWStall;
 
       always_comb begin
         case (WalkerState)
-          IDLE:   if      (MMUTranslate && SvMode == `SV48)     NextWalkerState = LEVEL3;
-                  else if (MMUTranslate && SvMode == `SV39)     NextWalkerState = LEVEL2;
+          IDLE:   if      (MMUTranslate && SvMode == `SV48)     NextWalkerState = LEVEL3_WDV;
+                  else if (MMUTranslate && SvMode == `SV39)     NextWalkerState = LEVEL2_WDV;
                   else                                          NextWalkerState = IDLE;
 
-          LEVEL3: if      (~MMUReady)                           NextWalkerState = LEVEL3;
+          LEVEL3_WDV: if      (HPTWStall)                       NextWalkerState = LEVEL3_WDV;
+	              else                                      NextWalkerState = LEVEL3;
+	  LEVEL3: 
                   // *** <FUTURE WORK> According to the architecture, we should
                   // fault upon finding a superpage that is misaligned or has 0
                   // access bit. The following commented line of code is
                   // supposed to perform that check. However, it is untested.
-                  else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF;
+                  if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF;
                   // else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;  // *** Once the above line is properly tested, delete this line.
-                  else if (ValidPTE && ~LeafPTE)                NextWalkerState = LEVEL2;
+                  else if (ValidPTE && ~LeafPTE)                NextWalkerState = LEVEL2_WDV;
                   else                                          NextWalkerState = FAULT;
 
-          LEVEL2: if      (~MMUReady)                           NextWalkerState = LEVEL2;
+          LEVEL2_WDV: if      (HPTWStall)                       NextWalkerState = LEVEL2_WDV;
+	              else                                      NextWalkerState = LEVEL2;
+	  LEVEL2:
                   // *** <FUTURE WORK> According to the architecture, we should
                   // fault upon finding a superpage that is misaligned or has 0
                   // access bit. The following commented line of code is
                   // supposed to perform that check. However, it is untested.
-                  else if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF;
+                  if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF;
                   // else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;  // *** Once the above line is properly tested, delete this line.
-                  else if (ValidPTE && ~LeafPTE)                NextWalkerState = LEVEL1;
+                  else if (ValidPTE && ~LeafPTE)                NextWalkerState = LEVEL1_WDV;
                   else                                          NextWalkerState = FAULT;
 
-          LEVEL1: if      (~MMUReady)                           NextWalkerState = LEVEL1;
+          LEVEL1_WDV: if      (HPTWStall)                       NextWalkerState = LEVEL1_WDV;
+	              else                                      NextWalkerState = LEVEL1;
+	  LEVEL1:
                   // *** <FUTURE WORK> According to the architecture, we should
                   // fault upon finding a superpage that is misaligned or has 0
                   // access bit. The following commented line of code is
                   // supposed to perform that check. However, it is untested.
-                  else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
+                  if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
                   // else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;  // *** Once the above line is properly tested, delete this line.
-                  else if (ValidPTE && ~LeafPTE)                NextWalkerState = LEVEL0;
+                  else if (ValidPTE && ~LeafPTE)                NextWalkerState = LEVEL0_WDV;
                   else                                          NextWalkerState = FAULT;
 
-          LEVEL0: if      (~MMUReady)                           NextWalkerState = LEVEL0;
-                  else if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF;
+          LEVEL0_WDV: if      (HPTWStall)                       NextWalkerState = LEVEL0_WDV;
+	              else                                      NextWalkerState = LEVEL0;
+	  LEVEL0:
+                  if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF;
                   else                                          NextWalkerState = FAULT;
                   
-          LEAF:   if      (MMUTranslate && SvMode == `SV48)     NextWalkerState = LEVEL3;
-                  else if (MMUTranslate && SvMode == `SV39)     NextWalkerState = LEVEL2;
+          LEAF:   if      (MMUTranslate && SvMode == `SV48)     NextWalkerState = LEVEL3_WDV;
+                  else if (MMUTranslate && SvMode == `SV39)     NextWalkerState = LEVEL2_WDV;
                   else                                          NextWalkerState = IDLE;
 
-          FAULT:  if      (MMUTranslate && SvMode == `SV48)     NextWalkerState = LEVEL3;
-                  else if (MMUTranslate && SvMode == `SV39)     NextWalkerState = LEVEL2;
+          FAULT:  if      (MMUTranslate && SvMode == `SV48)     NextWalkerState = LEVEL3_WDV;
+                  else if (MMUTranslate && SvMode == `SV39)     NextWalkerState = LEVEL2_WDV;
                   else                                          NextWalkerState = IDLE;
           // Default case should never happen, but is included for linter.
           default:                                              NextWalkerState = IDLE;
@@ -346,15 +382,29 @@ localparam LEVEL0 = 3'h0;
             // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off,
             // what should translationPAdr be when level3 is just off?
           end
+          LEVEL3_WDV: begin
+            TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
+            // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off,
+            // what should translationPAdr be when level3 is just off?
+          end
           LEVEL2: begin
             TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
           end
+          LEVEL2_WDV: begin
+            TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
+          end
           LEVEL1: begin
             TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
           end
+          LEVEL1_WDV: begin
+            TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
+          end
           LEVEL0: begin
             TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
           end
+          LEVEL0_WDV: begin
+            TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
+          end
           LEAF: begin
             // Keep physical address alive to prevent HADDR dropping to 0
             TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
@@ -380,8 +430,9 @@ localparam LEVEL0 = 3'h0;
       end
 
       // Capture page table entry from ahblite
-      flopenr #(`XLEN) ptereg(clk, reset, MMUReady, MMUReadPTE, SavedPTE);
-      mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE);
+      flopenr #(`XLEN) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE);
+      //mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE);
+      assign CurrentPTE = SavedPTE;
       assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10];
 
       // Assign outputs to ahblite
diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv
index 9431fc62..1cf63906 100644
--- a/wally-pipelined/src/mmu/tlb.sv
+++ b/wally-pipelined/src/mmu/tlb.sv
@@ -136,7 +136,7 @@ module tlb #(parameter ENTRY_BITS = 3,
   endgenerate
 
   // Whether translation should occur
-  assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE);
+  assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation;
 
   // Determine how the TLB is currently being used
   // Note that we use ReadAccess for both loads and instruction fetches
diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv
index 3985adae..28b8ccde 100644
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@@ -132,6 +132,7 @@ module wallypipelinedhart
   logic 		    MMUStall;
   logic 		    MMUTranslate, MMUReady;
   logic 		    HPTWReadyfromLSU;
+  logic 		    HPTWStall;
   
 
   // bus interface to dmem
@@ -171,6 +172,9 @@ module wallypipelinedhart
   logic 		    CommittedMfromLSU;
   logic 		    SquashSCWfromLSU;
   logic 		    DataMisalignedMfromLSU;
+  logic 		    StallWtoLSU;
+  logic 		    StallWfromLSU;  
+  logic [2:0] 		    Funct3MfromLSU;
   
   
   
@@ -199,11 +203,13 @@ module wallypipelinedhart
 		 .HPTWPAdr(MMUPAdr),
 		 .HPTWReadPTE(MMUReadPTE),
 		 .HPTWReady(MMUReady),
+		 .HPTWStall(HPTWStall),		 
 		 // CPU connection
 		 .MemRWM(MemRWM|FMemRWM),
 		 .Funct3M(Funct3M),
 		 .AtomicM(AtomicM),
 		 .MemAdrM(MemAdrM),
+		 .StallW(StallW),
 		 .WriteDataM(WriteDatatmpM),
 		 .ReadDataW(ReadDataW),
 		 .CommittedM(CommittedM),
@@ -216,7 +222,8 @@ module wallypipelinedhart
 		 .Funct3MtoLSU(Funct3MtoLSU),
 		 .AtomicMtoLSU(AtomicMtoLSU),
 		 .MemAdrMtoLSU(MemAdrMtoLSU),          
-		 .WriteDataMtoLSU(WriteDataMtoLSU),       
+		 .WriteDataMtoLSU(WriteDataMtoLSU),  
+		 .StallWtoLSU(StallWtoLSU),
 		 .CommittedMfromLSU(CommittedMfromLSU),     
 		 .SquashSCWfromLSU(SquashSCWfromLSU),      
 		 .DataMisalignedMfromLSU(DataMisalignedMfromLSU),
@@ -232,6 +239,7 @@ module wallypipelinedhart
 	  .MemAdrM(MemAdrMtoLSU),
 	  .WriteDataM(WriteDataMtoLSU),
 	  .ReadDataW(ReadDataWFromLSU),
+	  .StallW(StallWtoLSU),
 
 	  .CommittedM(CommittedMfromLSU),
 	  .SquashSCW(SquashSCWfromLSU),
@@ -239,16 +247,19 @@ module wallypipelinedhart
 	  .DisableTranslation(DisableTranslation),
 
 	  .DataStall(DataStall),
-	  .HPTWReady(HPTWReadyfromLSU), 
+	  .HPTWReady(HPTWReadyfromLSU),
+	  .Funct3MfromLSU(Funct3MfromLSU),
+	  .StallWfromLSU(StallWfromLSU),
 	  .* ); // data cache unit
 
   ahblite ebu( 
 	       //.InstrReadF(1'b0),
 	       //.InstrRData(InstrF), // hook up InstrF later
 	       .WriteDataM(WriteDatatmpM),
-	       .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
+	       .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]),
 	       .Funct7M(InstrM[31:25]),
 	       .HRDATAW(HRDATAW),
+	       .StallW(StallWfromLSU),
 	       .*);