diff --git a/pipelined/src/generic/flop/simpleram.sv b/pipelined/src/generic/flop/simpleram.sv
index b08021614..08c7d113b 100644
--- a/pipelined/src/generic/flop/simpleram.sv
+++ b/pipelined/src/generic/flop/simpleram.sv
@@ -34,15 +34,15 @@ module simpleram #(parameter BASE=0, RANGE = 65535) (
   input  logic             clk, 
   input  logic [31:0]      a,
   input  logic             we,
-  input  logic [`XLEN/8-1:0] ByteMask,
-  input  logic [`XLEN-1:0] wd,
-  output logic [`XLEN-1:0] rd
+  input  logic [`LLEN/8-1:0] ByteMask,
+  input  logic [`LLEN-1:0] wd,
+  output logic [`LLEN-1:0] rd
 );
 
   localparam ADDR_WDITH = $clog2(RANGE/8);
-  localparam OFFSET = $clog2(`XLEN/8);
+  localparam OFFSET = $clog2(`LLEN/8);
 
-  bram1p1rw #(`XLEN/8, 8, ADDR_WDITH) 
+  bram1p1rw #(`LLEN/8, 8, ADDR_WDITH) 
     memory(.clk, .we, .bwe(ByteMask), .addr(a[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(rd), .din(wd));
 endmodule
 
diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv
index b8e636c5f..e699bc576 100644
--- a/pipelined/src/ifu/ifu.sv
+++ b/pipelined/src/ifu/ifu.sv
@@ -187,7 +187,7 @@ module ifu (
 
   if (`IMEM == `MEM_TIM) begin : irom // *** fix up dtim taking PA_BITS rather than XLEN, *** IEUAdr is a bad name.  Probably use a ROM rather than DTIM
     dtim irom(.clk, .reset, .CPUBusy, .LSURWM(2'b10), .IEUAdrM({{(`XLEN-32){1'b0}}, PCPF[31:0]}), .IEUAdrE(PCNextFSpill),
-              .TrapM(1'b0), .FinalWriteDataM(), .ByteMaskM('0),
+              .TrapM(1'b0), .WriteDataM(), .ByteMaskM('0),
               .ReadDataWordM({{(`XLEN-32){1'b0}}, FinalInstrRawF}), .BusStall, .LSUBusWrite(), .LSUBusRead(IFUBusRead),
               .BusCommittedM(), .DCacheStallM(ICacheStallF), .Cacheable(CacheableF),
               .DCacheCommittedM(), .DCacheMiss(ICacheMiss), .DCacheAccess(ICacheAccess));
diff --git a/pipelined/src/lsu/atomic.sv b/pipelined/src/lsu/atomic.sv
index 5a0753974..2c7259a19 100644
--- a/pipelined/src/lsu/atomic.sv
+++ b/pipelined/src/lsu/atomic.sv
@@ -34,23 +34,23 @@ module atomic (
   input logic                clk,
   input logic                reset, StallW,
   input logic [`XLEN-1:0]    ReadDataM,
-  input logic [`XLEN-1:0]    LSUWriteDataM, 
+  input logic [`XLEN-1:0]    IMWriteDataM, 
   input logic [`PA_BITS-1:0] LSUPAdrM,
   input logic [6:0]          LSUFunct7M,
   input logic [2:0]          LSUFunct3M,
   input logic [1:0]          LSUAtomicM,
   input logic [1:0]          PreLSURWM,
   input logic                IgnoreRequest,
-  output logic [`XLEN-1:0]   AMOWriteDataM,
+  output logic [`XLEN-1:0]   IMAWriteDataM,
   output logic               SquashSCW,
   output logic [1:0]         LSURWM);
 
   logic [`XLEN-1:0] AMOResult;
   logic               MemReadM;
 
-  amoalu amoalu(.srca(ReadDataM), .srcb(LSUWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), 
+  amoalu amoalu(.srca(ReadDataM), .srcb(IMWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), 
                 .result(AMOResult));
-  mux2 #(`XLEN) wdmux(LSUWriteDataM, AMOResult, LSUAtomicM[1], AMOWriteDataM);
+  mux2 #(`XLEN) wdmux(IMWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM);
   assign MemReadM = PreLSURWM[1] & ~IgnoreRequest;
   lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .LSUPAdrM,
     .SquashSCW, .LSURWM);
diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv
index 5b4969ab8..4dcbda665 100644
--- a/pipelined/src/lsu/dtim.sv
+++ b/pipelined/src/lsu/dtim.sv
@@ -36,10 +36,10 @@ module dtim(
   input logic [`XLEN-1:0]   IEUAdrM,
   input logic [`XLEN-1:0]   IEUAdrE,
   input logic               TrapM, 
-  input logic [`XLEN-1:0]   FinalWriteDataM,
-  input logic [`XLEN/8-1:0] ByteMaskM,
+  input logic [`LLEN-1:0]   WriteDataM,
+  input logic [`LLEN/8-1:0] ByteMaskM,
   input logic               Cacheable,
-  output logic [`XLEN-1:0]  ReadDataWordM,
+  output logic [`LLEN-1:0]  ReadDataWordM,
   output logic              BusStall,
   output logic              LSUBusWrite,
   output logic              LSUBusRead,
@@ -53,7 +53,7 @@ module dtim(
       .clk, .ByteMask(ByteMaskM),
       .a(CPUBusy | LSURWM[0] | reset ? IEUAdrM[31:0] : IEUAdrE[31:0]), // move mux out; this shouldn't be needed when stails are handled differently ***
       .we(LSURWM[0] & Cacheable & ~TrapM),  // have to ignore write if Trap.
-      .wd(FinalWriteDataM), .rd(ReadDataWordM));
+      .wd(WriteDataM), .rd(ReadDataWordM));
 
   // since we have a local memory the bus connections are all disabled.
   // There are no peripherals supported.
diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv
index cf44fa297..cb37e1ef3 100644
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@@ -110,9 +110,10 @@ module lsu (
   logic                     BusCommittedM, DCacheCommittedM;
   logic                     SelLSUBusWord;
   logic                     DataDAPageFaultM;
-  logic [`XLEN-1:0]         LSUWriteDataM;
+  logic [`XLEN-1:0]         IMWriteDataM, IMAWriteDataM;
+  logic [`LLEN-1:0]         IMAFWriteDataM;
   logic [`LLEN-1:0]         ReadDataM;
-  logic [(`LLEN-1)/8:0]     ByteMaskM, FinalByteMaskM;
+  logic [(`LLEN-1)/8:0]     ByteMaskM;
   
   // *** TO DO: Burst mode
 
@@ -131,7 +132,7 @@ module lsu (
       .TrapM, .DCacheStallM, .SATP_REGW, .PCF,
       .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW,
       .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,
-      .IEUAdrExtM, .PTE, .LSUWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE,
+      .IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE,
       .LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW,
       .IgnoreRequestTLB);
   end else begin
@@ -140,7 +141,7 @@ module lsu (
     assign LSUAdrE = IEUAdrE[11:0]; 
     assign PreLSUPAdrM = IEUAdrExtM;
     assign LSUFunct3M = Funct3M;  assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM;
-    assign LSUWriteDataM = WriteDataM;
+    assign IMWriteDataM = WriteDataM;
    end
 
   // CommittedM tells the CPU's privilege unit the current instruction
@@ -188,8 +189,7 @@ module lsu (
   //  Memory System
   //  Either Data Cache or Data Tightly Integrated Memory or just bus interface
   /////////////////////////////////////////////////////////////////////////////////////////////
-  logic [`XLEN-1:0]    AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
-  logic [`LLEN-1:0]    FinalWriteDataM;
+  logic [`LLEN-1:0]    LSUWriteDataM, LittleEndianWriteDataM;
   logic [`LLEN-1:0]    ReadDataWordM, LittleEndianReadDataWordM;
   logic [`LLEN-1:0]    ReadDataWordMuxM;
   logic                IgnoreRequest;
@@ -202,7 +202,7 @@ module lsu (
   if (`DMEM == `MEM_TIM) begin : dtim
     // *** directly instantiate RAM or ROM here.  Instantiate SRAM1P1RW.  
     // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
-    dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
+    dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .WriteDataM(LSUWriteDataM), //*** fix the dtim FinalWriteData
               .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
               .DCacheStallM, .DCacheCommittedM, .ByteMaskM(ByteMaskM[`XLEN/8-1:0]), .Cacheable(CacheableM),
               .DCacheMiss, .DCacheAccess);
@@ -230,23 +230,15 @@ module lsu (
 
     mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DLSUBusBuffer[`XLEN-1:0]}),
       .s(SelUncachedAdr), .y(ReadDataWordMuxM));
-    mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
+    mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM),
       .s(SelUncachedAdr), .y(LSUBusHWDATA));
-    
-    // *** Ross fix up location of mux to be here; remove from IEU datapath
-    // *** look over entire FPU write and read paths
-    // *** Why is 
     if(CACHE_ENABLED) begin : dcache
-      if (`F_SUPPORTED)
-        mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IEUWriteDataM}}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
-      else
-        assign FinalWriteDataM = IEUWriteDataM;
       cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
               .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
         .clk, .reset, .CPUBusy, .SelLSUBusWord, .RW(LSURWM), .Atomic(LSUAtomicM),
         .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), 
-        .ByteMask(FinalByteMaskM), .WordCount,
-        .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
+        .ByteMask(ByteMaskM), .WordCount,
+        .FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM),
         .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
         .IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM), 
         .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), 
@@ -266,24 +258,27 @@ module lsu (
   // Atomic operations
   /////////////////////////////////////////////////////////////////////////////////////////////
   if (`A_SUPPORTED) begin:atomic
-    atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .LSUWriteDataM, .LSUPAdrM, 
+    atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .IMWriteDataM, .LSUPAdrM, 
       .LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest, 
-      .AMOWriteDataM, .SquashSCW, .LSURWM);
+      .IMAWriteDataM, .SquashSCW, .LSURWM);
   end else begin:lrsc
-    assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign AMOWriteDataM = LSUWriteDataM;
+    assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign IMAWriteDataM = IMWriteDataM;
   end
 
+  if (`F_SUPPORTED) 
+    mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IMAWriteDataM}}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM);
+  else assign IMAFWriteDataM = IMAWriteDataM;
+  
   /////////////////////////////////////////////////////////////////////////////////////////////
   // Subword Accesses
   /////////////////////////////////////////////////////////////////////////////////////////////
   subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
 		.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
   subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
-    .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM);
+    .LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM);
 
   // Compute byte masks
   swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM));
-  assign FinalByteMaskM = ByteMaskM;
 
   /////////////////////////////////////////////////////////////////////////////////////////////
   // MW Pipeline Register
@@ -297,10 +292,10 @@ module lsu (
   //  swap the bytes when read from big-endian memory
   /////////////////////////////////////////////////////////////////////////////////////////////
   if (`BIGENDIAN_SUPPORTED) begin:endian
-    bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
+    bigendianswap #(`LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM));
     bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
   end else begin
-    assign IEUWriteDataM = LittleEndianWriteDataM;
+    assign LSUWriteDataM = LittleEndianWriteDataM;
     assign LittleEndianReadDataWordM = ReadDataWordM;
   end
 
diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv
index 748aa3df0..a2d7c6285 100644
--- a/pipelined/src/lsu/lsuvirtmen.sv
+++ b/pipelined/src/lsu/lsuvirtmen.sv
@@ -54,7 +54,7 @@ module lsuvirtmem(
   output logic [6:0]          LSUFunct7M,
   input logic [`XLEN-1:0]     IEUAdrE,
   output logic [`XLEN-1:0]    PTE,
-  output logic [`XLEN-1:0]    LSUWriteDataM,
+  output logic [`XLEN-1:0]    IMWriteDataM,
   output logic [1:0]          PageType,
   output logic [1:0]          PreLSURWM,
   output logic [1:0]          LSUAtomicM,
@@ -112,8 +112,8 @@ module lsuvirtmem(
   mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE);
   mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, PreLSUPAdrM);
   if(`HPTW_WRITES_SUPPORTED)
-    mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, LSUWriteDataM);
-  else assign LSUWriteDataM = WriteDataM;
+    mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM);
+  else assign IMWriteDataM = WriteDataM;
   mux2 #(12) replaymux(PreLSUAdrE, IEUAdrExtM[11:0], SelReplayMemE, LSUAdrE); // replay cpu request after hptw.  *** redudant with mux in cache.
 
   // always block interrupts when using the hardware page table walker.
diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv
index d42033ef7..59546ec74 100644
--- a/pipelined/src/lsu/subwordwrite.sv
+++ b/pipelined/src/lsu/subwordwrite.sv
@@ -33,25 +33,34 @@
 module subwordwrite (
   input logic [2:0]          LSUPAdrM,
   input logic [2:0]          LSUFunct3M,
-  input logic [`XLEN-1:0]    AMOWriteDataM,
-  output logic [`XLEN-1:0]   LittleEndianWriteDataM);
+  input logic [`LLEN-1:0]    IMAFWriteDataM,
+  output logic [`LLEN-1:0]   LittleEndianWriteDataM);
 
   // Replicate data for subword writes
-  if (`XLEN == 64) begin:sww
+  if (`LLEN == 128) begin:sww
+    always_comb 
+      case(LSUFunct3M[2:0])
+        2'b000:  LittleEndianWriteDataM = {16{IMAFWriteDataM[7:0]}}; // sb
+        2'b001:  LittleEndianWriteDataM = {8{IMAFWriteDataM[15:0]}}; // sh
+        2'b010:  LittleEndianWriteDataM = {4{IMAFWriteDataM[31:0]}}; // sw
+        2'b011:  LittleEndianWriteDataM = {2{IMAFWriteDataM[63:0]}}; // sd
+        default: LittleEndianWriteDataM = IMAFWriteDataM;            // sq
+      endcase
+  end else if (`LLEN == 64) begin:sww
     always_comb 
       case(LSUFunct3M[1:0])
-        2'b00:  LittleEndianWriteDataM = {8{AMOWriteDataM[7:0]}};  // sb
-        2'b01:  LittleEndianWriteDataM = {4{AMOWriteDataM[15:0]}}; // sh
-        2'b10:  LittleEndianWriteDataM = {2{AMOWriteDataM[31:0]}}; // sw
-        2'b11:  LittleEndianWriteDataM = AMOWriteDataM;            // sw
+        2'b00:  LittleEndianWriteDataM = {8{IMAFWriteDataM[7:0]}};  // sb
+        2'b01:  LittleEndianWriteDataM = {4{IMAFWriteDataM[15:0]}}; // sh
+        2'b10:  LittleEndianWriteDataM = {2{IMAFWriteDataM[31:0]}}; // sw
+        2'b11:  LittleEndianWriteDataM = IMAFWriteDataM;            // sd
       endcase
   end else begin:sww // 32-bit
     always_comb 
       case(LSUFunct3M[1:0])
-        2'b00:  LittleEndianWriteDataM = {4{AMOWriteDataM[7:0]}};  // sb
-        2'b01:  LittleEndianWriteDataM = {2{AMOWriteDataM[15:0]}}; // sh
-        2'b10:  LittleEndianWriteDataM = AMOWriteDataM;            // sw
-        default: LittleEndianWriteDataM = AMOWriteDataM; // shouldn't happen
+        2'b00:  LittleEndianWriteDataM = {4{IMAFWriteDataM[7:0]}};  // sb
+        2'b01:  LittleEndianWriteDataM = {2{IMAFWriteDataM[15:0]}}; // sh
+        2'b10:  LittleEndianWriteDataM = IMAFWriteDataM;            // sw
+        default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen
       endcase
   end
 endmodule
diff --git a/pipelined/src/mmu/adrdecs.sv b/pipelined/src/mmu/adrdecs.sv
index 0104ca578..3923c2a67 100644
--- a/pipelined/src/mmu/adrdecs.sv
+++ b/pipelined/src/mmu/adrdecs.sv
@@ -38,17 +38,17 @@ module adrdecs (
   output logic [8:0]          SelRegions
 );
 
+  localparam logic [3:0]          SUPPORTED_SIZE = (`XLEN == 64 ? 4'b1111 : 4'b0111);
  // Determine which region of physical memory (if any) is being accessed
- // *** eventually uncomment Access signals
-  adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, 4'b1111, SelRegions[7]);  
-  adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, /*1'b1*/AccessRX, Size, 4'b1111, SelRegions[6]);
-  adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, /*1'b1*/AccessRWX, Size, 4'b1111, SelRegions[5]);
+  adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[7]);  
+  adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[6]);
+  adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[5]);
 
-  adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[4]);
+  adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[4]);
   adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[3]);
   adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[2]);
   adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[1]);
-  adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, 4'b1100, SelRegions[0]); // *** PMA chapter says xlen only like CLINT
+  adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE & 4'b1100, SelRegions[0]); 
 
   assign SelRegions[8] = ~|(SelRegions[7:0]);