From 824014c5c0788d9c4427aecf9bccf1e9f5c5ddb7 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Wed, 27 Jan 2021 22:49:47 -0500
Subject: [PATCH] Repartitioned with Instruction Fetch Unit, Integer Execution
 Unit

---
 wally-pipelined/regression/wally-pipelined.do |  19 ++--
 wally-pipelined/src/controller.sv             |  11 +-
 wally-pipelined/src/datapath.sv               |  57 ++--------
 wally-pipelined/src/extend.sv                 |  12 +--
 wally-pipelined/src/gpio.sv                   |   2 +-
 wally-pipelined/src/ieu.sv                    |  25 ++---
 wally-pipelined/src/{pclogic.sv => ifu.sv}    |  46 ++++++--
 wally-pipelined/src/instrDecompress.sv        | 100 +++++++++---------
 wally-pipelined/src/wallypipelinedhart.sv     |  37 ++-----
 .../testbench/testbench-imperas.sv            |   5 +-
 10 files changed, 140 insertions(+), 174 deletions(-)
 rename wally-pipelined/src/{pclogic.sv => ifu.sv} (69%)

diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do
index 9c510991..5c4715d0 100644
--- a/wally-pipelined/regression/wally-pipelined.do
+++ b/wally-pipelined/regression/wally-pipelined.do
@@ -45,30 +45,29 @@ view wave
 add wave /testbench/clk
 add wave /testbench/reset
 add wave -divider
-add wave -hex /testbench/dut/hart/ieu/dp/PCF
-add wave -hex /testbench/dut/hart/ieu/dp/InstrF
+add wave -hex /testbench/dut/hart/ifu/PCF
+add wave -hex /testbench/dut/hart/ifu/InstrF
 add wave /testbench/InstrFName
-#add wave -hex /testbench/dut/hart/ieu/dp/PCD
-add wave -hex /testbench/dut/hart/ieu/dp/InstrD
+#add wave -hex /testbench/dut/hart/ifu/PCD
+add wave -hex /testbench/dut/hart/ifu/InstrD
 add wave /testbench/InstrDName
 add wave -divider
-#add wave -hex /testbench/dut/hart/ieu/dp/PCE
-#add wave -hex /testbench/dut/hart/ieu/dp/InstrE
+#add wave -hex /testbench/dut/hart/ifu/PCE
+#add wave -hex /testbench/dut/hart/ifu/InstrE
 add wave /testbench/InstrEName
 add wave -hex /testbench/dut/hart/ieu/dp/SrcAE
 add wave -hex /testbench/dut/hart/ieu/dp/SrcBE
 add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE
 add wave /testbench/dut/hart/ieu/dp/PCSrcE
 add wave -divider
-#add wave -hex /testbench/dut/hart/ieu/dp/PCM
-#add wave -hex /testbench/dut/hart/ieu/dp/InstrM
+#add wave -hex /testbench/dut/hart/ifu/PCM
+#add wave -hex /testbench/dut/hart/ifu/InstrM
 add wave /testbench/InstrMName
 add wave /testbench/dut/dmem/dtim/memwrite
 add wave -hex /testbench/dut/dmem/AdrM
 add wave -hex /testbench/dut/dmem/WriteDataM
 add wave -divider
-add wave -hex /testbench/dut/hart/ieu/dp/PCW
-#add wave -hex /testbench/dut/hart/ieu/dp/InstrW
+add wave -hex /testbench/dut/hart/ifu/PCW
 add wave /testbench/InstrWName
 add wave /testbench/dut/hart/ieu/dp/RegWriteW
 add wave -hex /testbench/dut/hart/ieu/dp/ResultW
diff --git a/wally-pipelined/src/controller.sv b/wally-pipelined/src/controller.sv
index f4583762..5648db2c 100644
--- a/wally-pipelined/src/controller.sv
+++ b/wally-pipelined/src/controller.sv
@@ -34,8 +34,8 @@ module controller(
   input logic 	     Funct7b5D,
   output logic [2:0] ImmSrcD,
   input logic        StallD, FlushD, 
-  input logic        IllegalCompInstrD, 
-  output logic       IllegalIEUInstrFaultD,
+  input logic        IllegalIEUInstrFaultD, 
+  output logic       IllegalBaseInstrFaultD,
   // Execute stage control signals
   input logic 	     FlushE, 
   input logic  [2:0] FlagsE, 
@@ -74,7 +74,6 @@ module controller(
   logic       InstrValidE, InstrValidM;
   logic       PrivilegedD, PrivilegedE;
   logic [18:0] ControlsD;
-  logic        PreIllegalInstrFaultD;
   logic        aluc3D;
   logic        subD, sraD, sltD, sltuD;
   logic        BranchTakenE;
@@ -107,10 +106,11 @@ module controller(
 
   // unswizzle control bits
   // squash control signals if coming from an illegal compressed instruction
+  assign IllegalBaseInstrFaultD = ControlsD[0];
   assign {RegWriteD, ImmSrcD, ALUSrcAD, ALUSrcBD, MemRWD,
           ResultSrcD, BranchD, ALUOpD, JumpD, TargetSrcD, W64D, CSRWriteD,
-          PrivilegedD, PreIllegalInstrFaultD} = ControlsD & ~IllegalCompInstrD;
-  assign IllegalIEUInstrFaultD = PreIllegalInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
+          PrivilegedD} = ControlsD[18:1] & ~IllegalIEUInstrFaultD;
+          // *** move Privileged, CSRwrite
 
   // ALU Decoding
   assign sltD = (Funct3D == 3'b010);
@@ -133,7 +133,6 @@ module controller(
                            {RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, TargetSrcD, CSRWriteD, PrivilegedD, Funct3D, 1'b1},
                            {RegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, TargetSrcE, CSRWriteE, PrivilegedE, Funct3E, InstrValidE});
 
-
   // Branch Logic
   assign {zeroE, ltE, ltuE} = FlagsE;
   
diff --git a/wally-pipelined/src/datapath.sv b/wally-pipelined/src/datapath.sv
index 78d904df..bb9a17fe 100644
--- a/wally-pipelined/src/datapath.sv
+++ b/wally-pipelined/src/datapath.sv
@@ -28,17 +28,11 @@
 module datapath (
   input logic clk, reset,
   // Fetch stage signals
-  input  logic        StallF,
-  output logic [`XLEN-1:0] PCF,
-  input  logic [31:0] InstrF,
   // Decode stage signals
-  output logic [6:0]  OpD,
-  output logic [2:0]	Funct3D, 
-  output logic        Funct7b5D,
   input  logic        StallD, FlushD,
   input  logic [2:0]  ImmSrcD,
   input  logic        LoadStallD, // for performance counter
-  output logic        IllegalCompInstrD,
+  input  logic [31:0] InstrD,
   // Execute stage signals
   input  logic        FlushE,
   input  logic [1:0]  ForwardAE, ForwardBE,
@@ -46,51 +40,41 @@ module datapath (
   input  logic [4:0]  ALUControlE,
   input  logic        ALUSrcAE, ALUSrcBE,
   input  logic        TargetSrcE, 
+  input  logic [`XLEN-1:0] PCE,
   output logic [2:0]  FlagsE,
+  output logic [`XLEN-1:0] PCTargetE,
   // Memory stage signals
   input  logic        FlushM,
   input  logic [1:0]  MemRWM,
   input  logic [2:0]  Funct3M,
-  output logic [31:0]     InstrM,
   output logic [`XLEN-1:0] SrcAM,
-  output logic [`XLEN-1:0] PCM,
   input  logic [`XLEN-1:0] CSRReadValM,
   input  logic [`XLEN-1:0] PrivilegedNextPCM,
   output logic [`XLEN-1:0] WriteDataM, ALUResultM,
-  output logic [`XLEN-1:0] InstrMisalignedAdrM,
   input  logic [`XLEN-1:0] ReadDataM,
   output logic [7:0]  ByteMaskM,
   input  logic        RetM, TrapM,
   input  logic        DataAccessFaultM,
-  output logic InstrMisalignedFaultM,
   output logic LoadMisalignedFaultM, LoadAccessFaultM, // *** eventually move these to the memory interface, along with memdp
   output logic StoreMisalignedFaultM, StoreAccessFaultM,
   // Writeback stage signals
   input  logic        FlushW,
   input  logic        RegWriteW, 
   input  logic [1:0]  ResultSrcW,
-  input  logic        InstrValidW,
-  input  logic        FloatRegWriteW,
+  input  logic [`XLEN-1:0] PCW,
+
   // Hazard Unit signals 
   output logic [4:0]  Rs1D, Rs2D, Rs1E, Rs2E,
   output logic [4:0]  RdE, RdM, RdW);
 
-
-
   // Fetch stage signals
-  logic [`XLEN-1:0] PCPlus2or4F;
   // Decode stage signals
-  logic [31:0]     InstrD;
-  logic [`XLEN-1:0] PCD;
-//  logic [`XLEN-1:0] PCPlus2or4D;
   logic [`XLEN-1:0] RD1D, RD2D;
   logic [`XLEN-1:0] ExtImmD;
-  logic [31:0]     InstrDecompD;
   logic [4:0]      RdD;
   // Execute stage signals
-  logic [31:0]     InstrE;
   logic [`XLEN-1:0] RD1E, RD2E;
-  logic [`XLEN-1:0] PCE, ExtImmE;
+  logic [`XLEN-1:0] ExtImmE;
   logic [`XLEN-1:0] PreSrcAE, SrcAE, SrcBE;
   logic [`XLEN-1:0] ALUResultE;
   logic [`XLEN-1:0] WriteDataE;
@@ -101,37 +85,20 @@ module datapath (
   // Writeback stage signals
   logic [`XLEN-1:0] ALUResultW;
   logic [`XLEN-1:0] ReadDataW;
-  logic [`XLEN-1:0] PCW;
   logic [`XLEN-1:0] CSRValW;
   logic [`XLEN-1:0] ResultW;
 
-  logic [31:0]     nop = 32'h00000013; // instruction for NOP
-
-  // Fetch stage pipeline register and logic; also Ex stage for branches
-  pclogic pclogic(.*);
-
-  // Decode stage pipeline register and logic
-  flopenl #(32)    InstrDReg(clk, reset, ~StallD, (FlushD ? nop : InstrF), nop, InstrD);
-  flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD);
-//  flopenrc #(`XLEN) PCPlus2or4DReg(clk, reset, FlushD, ~StallD, PCPlus2or4F, PCPlus2or4D);
-   
-  instrDecompress decomp(.*);
-  assign OpD       = InstrDecompD[6:0];
-  assign Funct3D   = InstrDecompD[14:12];
-  assign Funct7b5D = InstrDecompD[30];
-  assign Rs1D      = InstrDecompD[19:15];
-  assign Rs2D      = InstrDecompD[24:20];
-  assign RdD       = InstrDecompD[11:7];
+  assign Rs1D      = InstrD[19:15];
+  assign Rs2D      = InstrD[24:20];
+  assign RdD       = InstrD[11:7];
 	
   regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, RD1D, RD2D);
-  extend ext(.InstrDecompD(InstrDecompD[31:7]), .*);
+  extend ext(.InstrD(InstrD[31:7]), .*);
  
   // Execute stage pipeline register and logic
   floprc #(`XLEN) RD1EReg(clk, reset, FlushE, RD1D, RD1E);
   floprc #(`XLEN) RD2EReg(clk, reset, FlushE, RD2D, RD2E);
-  floprc #(`XLEN) PCEReg(clk, reset, FlushE, PCD, PCE);
   floprc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ExtImmD, ExtImmE);
-  flopr  #(32)   InstrEReg(clk, reset, FlushE ? nop : InstrDecompD, InstrE);
   floprc #(5)    Rs1EReg(clk, reset, FlushE, Rs1D, Rs1E);
   floprc #(5)    Rs2EReg(clk, reset, FlushE, Rs2D, Rs2E);
   floprc #(5)    RdEReg(clk, reset, FlushE, RdD, RdE);
@@ -142,13 +109,12 @@ module datapath (
   mux2  #(`XLEN)  srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE);
   alu   #(`XLEN)  alu(SrcAE, SrcBE, ALUControlE, ALUResultE, FlagsE);
   mux2  #(`XLEN)  targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE);
+  assign  PCTargetE = ExtImmE + TargetBaseE;
 
   // Memory stage pipeline register
   floprc #(`XLEN) SrcAMReg(clk, reset, FlushM, SrcAE, SrcAM);
   floprc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ALUResultE, ALUResultM);
   floprc #(`XLEN) WriteDataMReg(clk, reset, FlushM, WriteDataE, WriteDataFullM);
-  floprc #(`XLEN) PCMReg(clk, reset, FlushM, PCE, PCM);
-  flopr  #(32)   InstrMReg(clk, reset, FlushM ? nop : InstrE, InstrM);
   floprc #(5)    RdMEg(clk, reset, FlushM, RdE, RdM);
   
   memdp memdp(.AdrM(ALUResultM), .*);
@@ -156,7 +122,6 @@ module datapath (
   // Writeback stage pipeline register and logic
   floprc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ALUResultM, ALUResultW);
   floprc #(`XLEN) ReadDataWReg(clk, reset, FlushW, ReadDataExtM, ReadDataW);
-  floprc #(`XLEN) PCWReg(clk, reset, FlushW, PCM, PCW);
   floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRValW);
   floprc #(5)    RdWEg(clk, reset, FlushW, RdM, RdW);
 
diff --git a/wally-pipelined/src/extend.sv b/wally-pipelined/src/extend.sv
index 78a3d973..4043d6bc 100644
--- a/wally-pipelined/src/extend.sv
+++ b/wally-pipelined/src/extend.sv
@@ -26,22 +26,22 @@
 `include "wally-config.vh"
 
 module extend (
-  input  logic [31:7]     InstrDecompD,
+  input  logic [31:7]     InstrD,
   input  logic [2:0]      ImmSrcD,
   output logic [`XLEN-1:0] ExtImmD);
  
   always_comb
     case(ImmSrcD) 
                // I-type 
-      3'b000:   ExtImmD = {{(`XLEN-12){InstrDecompD[31]}}, InstrDecompD[31:20]};  
+      3'b000:   ExtImmD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:20]};  
                // S-type (stores)
-      3'b001:   ExtImmD = {{(`XLEN-12){InstrDecompD[31]}}, InstrDecompD[31:25], InstrDecompD[11:7]}; 
+      3'b001:   ExtImmD = {{(`XLEN-12){InstrD[31]}}, InstrD[31:25], InstrD[11:7]}; 
                // B-type (branches)
-      3'b010:   ExtImmD = {{(`XLEN-12){InstrDecompD[31]}}, InstrDecompD[7], InstrDecompD[30:25], InstrDecompD[11:8], 1'b0}; 
+      3'b010:   ExtImmD = {{(`XLEN-12){InstrD[31]}}, InstrD[7], InstrD[30:25], InstrD[11:8], 1'b0}; 
                // J-type (jal)
-      3'b011:   ExtImmD = {{(`XLEN-20){InstrDecompD[31]}}, InstrDecompD[19:12], InstrDecompD[20], InstrDecompD[30:21], 1'b0}; 
+      3'b011:   ExtImmD = {{(`XLEN-20){InstrD[31]}}, InstrD[19:12], InstrD[20], InstrD[30:21], 1'b0}; 
                // U-type (lui, auipc)
-      3'b100:  ExtImmD = {{(`XLEN-31){InstrDecompD[31]}}, InstrDecompD[30:12], 12'b0}; 
+      3'b100:  ExtImmD = {{(`XLEN-31){InstrD[31]}}, InstrD[30:12], 12'b0}; 
       /* verilator lint_off WIDTH */
       default: ExtImmD = 'bx; // undefined
       /* verilator lint_on WIDTH */
diff --git a/wally-pipelined/src/gpio.sv b/wally-pipelined/src/gpio.sv
index 00bbde09..d2a02770 100644
--- a/wally-pipelined/src/gpio.sv
+++ b/wally-pipelined/src/gpio.sv
@@ -77,7 +77,7 @@ module gpio (
         if (reset) begin
           INPUT_EN <= 0;
           OUTPUT_EN <= 0;
-          // OUTPUT_VAL <= 0; // spec indicates synchronous rset (software control)
+          OUTPUT_VAL <= 0; // spec indicates synchronous reset (software control)
         end else if (memwrite) begin
           if (entry == 8'h00) INPUT_EN <= MaskedWriteDataM[63:32];
           if (entry == 8'h08) {OUTPUT_VAL, OUTPUT_EN} <= MaskedWriteDataM;
diff --git a/wally-pipelined/src/ieu.sv b/wally-pipelined/src/ieu.sv
index 8796a0e3..163e38dd 100644
--- a/wally-pipelined/src/ieu.sv
+++ b/wally-pipelined/src/ieu.sv
@@ -27,44 +27,36 @@
 
 module ieu (
   input  logic            clk, reset,
-  output logic [`XLEN-1:0] PCF,
-  input  logic [31:0]     InstrF,
   output logic [1:0]      MemRWM,
   output logic [7:0]      ByteMaskM,
   output logic [`XLEN-1:0] ALUResultM, WriteDataM,
   input  logic [`XLEN-1:0] ReadDataM,
   input  logic            DataAccessFaultM,
   input  logic [1:0]      ForwardAE, ForwardBE,
-  input  logic            StallF, StallD, FlushD, FlushE, FlushM, FlushW,
+  input  logic            StallD, FlushD, FlushE, FlushM, FlushW,
   output logic        PCSrcE,
   output logic        RegWriteM,
   output logic 	     MemReadE,
   output logic        RegWriteW,
   output logic        CSRWriteM, PrivilegedM,
   output logic        CSRWritePendingDEM,
-  output logic [31:0]     InstrM,
   output logic [`XLEN-1:0] SrcAM,
-  output logic [`XLEN-1:0] PCM,
+  output logic [`XLEN-1:0] PCTargetE,
+  input  logic [31:0] InstrD,
+  input  logic [`XLEN-1:0] PCE, PCW,
   input  logic [`XLEN-1:0] CSRReadValM,
-  input   logic [`XLEN-1:0] PrivilegedNextPCM, // *** eentually move to ifu
-  output logic [`XLEN-1:0] InstrMisalignedAdrM,
-  output logic InstrMisalignedFaultM,
+  input   logic [`XLEN-1:0] PrivilegedNextPCM, // *** eventually move to ifu
   output logic LoadMisalignedFaultM, LoadAccessFaultM, // *** eventually move these to the memory interface, along with memdp
   output logic StoreMisalignedFaultM, StoreAccessFaultM,
-  output  logic        IllegalIEUInstrFaultD,
+  input logic        IllegalIEUInstrFaultD, 
+  output logic       IllegalBaseInstrFaultD,
   output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
-  output logic       FloatRegWriteW,
   output logic       InstrValidW,
   input  logic        RetM, TrapM,
   input logic        LoadStallD
-
 );
 
-  logic [2:0]  Funct3D;
-  logic        Funct7b5D;
-  logic [6:0]  OpD;
   logic [2:0]  ImmSrcD;
-  logic        IllegalCompInstrD;
   logic [2:0]  FlagsE;
   logic [4:0]  ALUControlE;
   logic        ALUSrcAE, ALUSrcBE;
@@ -74,6 +66,7 @@ module ieu (
 
   logic       TargetSrcE;
            
-  controller c(.*);
+  controller c(.OpD(InstrD[6:0]), .Funct3D(InstrD[14:12]), .Funct7b5D(InstrD[30]), .*);
   datapath   dp(.*);             
 endmodule
+
diff --git a/wally-pipelined/src/pclogic.sv b/wally-pipelined/src/ifu.sv
similarity index 69%
rename from wally-pipelined/src/pclogic.sv
rename to wally-pipelined/src/ifu.sv
index 9f550655..c8c5d849 100644
--- a/wally-pipelined/src/pclogic.sv
+++ b/wally-pipelined/src/ifu.sv
@@ -1,10 +1,11 @@
 ///////////////////////////////////////////
-// pclogic.sv
+// ifu.sv
 //
 // Written: David_Harris@hmc.edu 9 January 2021
 // Modified: 
 //
-// Purpose: Determine Program Counter considering branches, exceptions, ret, reset
+// Purpose: Instrunction Fetch Unit
+//           PC, branch prediction, instruction cache
 // 
 // A component of the Wally configurable RISC-V project.
 // 
@@ -25,32 +26,42 @@
 
 `include "wally-config.vh"
 
-module pclogic (
+module ifu (
   input  logic            clk, reset,
-  input  logic            StallF, PCSrcE, 
+  input  logic            StallF, StallD, FlushD, FlushE, FlushM, FlushW,
+  input  logic            PCSrcE, 
   input  logic [31:0]     InstrF,
-  input  logic [`XLEN-1:0] ExtImmE, TargetBaseE,
+  input  logic [`XLEN-1:0] PCTargetE,
   input  logic            RetM, TrapM, 
   input  logic [`XLEN-1:0] PrivilegedNextPCM, 
-  output logic [`XLEN-1:0] PCF, PCPlus2or4F,
+  output logic [31:0]     InstrD, InstrM,
+  output logic [`XLEN-1:0] PCF, PCE, PCM, PCW, 
+  input  logic            IllegalBaseInstrFaultD,
+  output logic            IllegalIEUInstrFaultD,
   output logic            InstrMisalignedFaultM,
-  output logic [`XLEN-1:0] InstrMisalignedAdrM);
+  output logic [`XLEN-1:0] InstrMisalignedAdrM
+);
 
-  logic [`XLEN-1:0] UnalignedPCNextF, PCNextF, PCTargetE;
+  logic [`XLEN-1:0] UnalignedPCNextF, PCNextF;
   logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM;
   logic StallExceptResolveBranchesF, PrivilegedChangePCM;
-  logic [`XLEN-3:0] PCPlusUpperF;
+  logic IllegalCompInstrD;
+  logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD;
   logic        CompressedF;
+  logic [31:0]     InstrRawD, InstrE;
+  logic [31:0]     nop = 32'h00000013; // instruction for NOP
+
+  // *** put memory interface on here, InstrF becomes output
 
   assign PrivilegedChangePCM = RetM | TrapM;
 
   assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM);
 
-  assign  PCTargetE = ExtImmE + TargetBaseE;
   mux3    #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF);
   assign  PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
   flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF);
 
+
   // pcadder
   // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
   assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction?
@@ -63,6 +74,15 @@ module pclogic (
       else        PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10};
     else          PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4
 
+
+  // Decode stage pipeline register and logic
+  flopenl #(32)    InstrDReg(clk, reset, ~StallD, (FlushD ? nop : InstrF), nop, InstrRawD);
+  flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD);
+   
+  instrDecompress decomp(.*);
+  assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
+  // *** combine these with others in better way, including M, F
+
   // Misaligned PC logic
 
   generate
@@ -79,5 +99,11 @@ module pclogic (
   assign TrapMisalignedFaultM = misaligned & PrivilegedChangePCM;
   assign InstrMisalignedFaultM = BranchMisalignedFaultM; // | TrapMisalignedFaultM; *** put this back in without causing a cyclic path
   
+  flopr  #(32)   InstrEReg(clk, reset, FlushE ? nop : InstrD, InstrE);
+  flopr  #(32)   InstrMReg(clk, reset, FlushM ? nop : InstrE, InstrM);
+  floprc #(`XLEN) PCEReg(clk, reset, FlushE, PCD, PCE);
+  floprc #(`XLEN) PCMReg(clk, reset, FlushM, PCE, PCM);
+  floprc #(`XLEN) PCWReg(clk, reset, FlushW, PCM, PCW);
+
 endmodule
 
diff --git a/wally-pipelined/src/instrDecompress.sv b/wally-pipelined/src/instrDecompress.sv
index 32fa4cd1..c2e4c7bc 100644
--- a/wally-pipelined/src/instrDecompress.sv
+++ b/wally-pipelined/src/instrDecompress.sv
@@ -26,8 +26,8 @@
 `include "wally-config.vh"
 
 module instrDecompress (
-  input  logic [31:0]     InstrD,
-  output logic [31:0]     InstrDecompD,
+  input  logic [31:0]     InstrRawD,
+  output logic [31:0]     InstrD,
   output logic            IllegalCompInstrD);
                         
   logic [15:0] instr16;
@@ -40,10 +40,10 @@ module instrDecompress (
   // if the system handles compressed instructions, decode appropriately
   generate
     if (!(`C_SUPPORTED)) begin // no compressed mode
-      assign InstrDecompD = InstrD;
+      assign InstrD = InstrRawD;
       assign IllegalCompInstrD = 0;
     end else begin // COMPRESSED mode supported
-      assign instr16 = InstrD[15:0]; // instruction is alreay aligned
+      assign instr16 = InstrRawD[15:0]; // instruction is alreay aligned
       assign op = instr16[1:0];
       assign rds1 = instr16[11:7];
       assign rs2 = instr16[6:2];
@@ -77,98 +77,98 @@ module instrDecompress (
    
       always_comb
         if (op == 2'b11) begin // noncompressed instruction
-          InstrDecompD = InstrD; 
+          InstrD = InstrRawD; 
           IllegalCompInstrD = 0;
         end else begin  // convert compressed instruction into uncompressed
           IllegalCompInstrD = 0;
           case ({op, instr16[15:13]})
-            5'b00000: if (immCIW != 0) InstrDecompD = {immCIW, 5'b00010, 3'b000, rdp, 7'b0010011}; // c.addi4spn
+            5'b00000: if (immCIW != 0) InstrD = {immCIW, 5'b00010, 3'b000, rdp, 7'b0010011}; // c.addi4spn
                       else begin // illegal instruction
                         IllegalCompInstrD = 1;
-                        InstrDecompD = {16'b0, instr16}; // preserve instruction for mtval on trap
+                        InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap
                       end
-            5'b00001: InstrDecompD = {immCLD, rs1p, 3'b011, rdp, 7'b0000111}; // c.fld
-            5'b00010: InstrDecompD = {immCL, rs1p, 3'b010, rdp, 7'b0000011}; // c.lw
+            5'b00001: InstrD = {immCLD, rs1p, 3'b011, rdp, 7'b0000111}; // c.fld
+            5'b00010: InstrD = {immCL, rs1p, 3'b010, rdp, 7'b0000011}; // c.lw
             5'b00011: if (`XLEN==32)
-                        InstrDecompD = {immCL, rs1p, 3'b010, rdp, 7'b0000111}; // c.flw
+                        InstrD = {immCL, rs1p, 3'b010, rdp, 7'b0000111}; // c.flw
                       else
-                        InstrDecompD = {immCLD, rs1p, 3'b011, rdp, 7'b0000011}; // c.ld;
-            5'b00101: InstrDecompD = {immCSD[11:5], rs2p, rs1p, 3'b011, immCSD[4:0], 7'b0100111}; // c.fsd
-            5'b00110: InstrDecompD = {immCS[11:5], rs2p, rs1p, 3'b010, immCS[4:0], 7'b0100011}; // c.sw
+                        InstrD = {immCLD, rs1p, 3'b011, rdp, 7'b0000011}; // c.ld;
+            5'b00101: InstrD = {immCSD[11:5], rs2p, rs1p, 3'b011, immCSD[4:0], 7'b0100111}; // c.fsd
+            5'b00110: InstrD = {immCS[11:5], rs2p, rs1p, 3'b010, immCS[4:0], 7'b0100011}; // c.sw
             5'b00111: if (`XLEN==32)
-                        InstrDecompD = {immCS[11:5], rs2p, rs1p, 3'b010, immCS[4:0], 7'b0100111}; // c.fsw
+                        InstrD = {immCS[11:5], rs2p, rs1p, 3'b010, immCS[4:0], 7'b0100111}; // c.fsw
                       else
-                        InstrDecompD = {immCSD[11:5], rs2p, rs1p, 3'b011, immCSD[4:0], 7'b0100011}; //c.sd
-            5'b01000: InstrDecompD = {immCI, rds1, 3'b000, rds1, 7'b0010011}; // c.addi
+                        InstrD = {immCSD[11:5], rs2p, rs1p, 3'b011, immCSD[4:0], 7'b0100011}; //c.sd
+            5'b01000: InstrD = {immCI, rds1, 3'b000, rds1, 7'b0010011}; // c.addi
             5'b01001: if (`XLEN==32) 
-                        InstrDecompD = {immCJ, 5'b00001, 7'b1101111}; // c.jal
+                        InstrD = {immCJ, 5'b00001, 7'b1101111}; // c.jal
                       else
-                        InstrDecompD = {immCI, rds1, 3'b000, rds1, 7'b0011011}; // c.addiw
-            5'b01010: InstrDecompD = {immCI, 5'b00000, 3'b000, rds1, 7'b0010011}; // c.li
+                        InstrD = {immCI, rds1, 3'b000, rds1, 7'b0011011}; // c.addiw
+            5'b01010: InstrD = {immCI, 5'b00000, 3'b000, rds1, 7'b0010011}; // c.li
             5'b01011: if (rds1 != 5'b00010)
-                       InstrDecompD = {immCILUI, rds1, 7'b0110111}; // c.lui
+                       InstrD = {immCILUI, rds1, 7'b0110111}; // c.lui
                       else 
-                       InstrDecompD = {immCIASP, rds1, 3'b000, rds1, 7'b0010011}; // c.addi16sp
+                       InstrD = {immCIASP, rds1, 3'b000, rds1, 7'b0010011}; // c.addi16sp
             5'b01100: if (instr16[11:10] == 2'b00)
-                        InstrDecompD = {6'b000000, immSH, rds1p, 3'b101, rds1p, 7'b0010011}; // c.srli
+                        InstrD = {6'b000000, immSH, rds1p, 3'b101, rds1p, 7'b0010011}; // c.srli
                       else if (instr16[11:10] == 2'b01)
-                        InstrDecompD = {6'b010000, immSH, rds1p, 3'b101, rds1p, 7'b0010011}; // c.srai
+                        InstrD = {6'b010000, immSH, rds1p, 3'b101, rds1p, 7'b0010011}; // c.srai
                       else if (instr16[11:10] == 2'b10) 
-                        InstrDecompD = {immCI, rds1p, 3'b111, rds1p, 7'b0010011}; // c.andi
+                        InstrD = {immCI, rds1p, 3'b111, rds1p, 7'b0010011}; // c.andi
                       else if (instr16[12:10] == 3'b011)
                         if (instr16[6:5] == 2'b00) 
-                          InstrDecompD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0110011}; // c.sub
+                          InstrD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0110011}; // c.sub
                         else if (instr16[6:5] == 2'b01) 
-                          InstrDecompD = {7'b0000000, rs2p, rds1p, 3'b100, rds1p, 7'b0110011}; // c.xor
+                          InstrD = {7'b0000000, rs2p, rds1p, 3'b100, rds1p, 7'b0110011}; // c.xor
                         else if (instr16[6:5] == 2'b10) 
-                          InstrDecompD = {7'b0000000, rs2p, rds1p, 3'b110, rds1p, 7'b0110011}; // c.or
+                          InstrD = {7'b0000000, rs2p, rds1p, 3'b110, rds1p, 7'b0110011}; // c.or
                         else // if (instr16[6:5] == 2'b11) 
-                          InstrDecompD = {7'b0000000, rs2p, rds1p, 3'b111, rds1p, 7'b0110011}; // c.and
+                          InstrD = {7'b0000000, rs2p, rds1p, 3'b111, rds1p, 7'b0110011}; // c.and
                       else if (instr16[12:10] == 3'b111 && `XLEN > 32)
                         if (instr16[6:5] == 2'b00)
-                          InstrDecompD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.subw
+                          InstrD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.subw
                         else if (instr16[6:5] == 2'b01)
-                          InstrDecompD = {7'b0000000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.addw
+                          InstrD = {7'b0000000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.addw
                         else begin // reserved  
                           IllegalCompInstrD = 1;
-                          InstrDecompD = {16'b0, instr16}; // preserve instruction for mtval on trap
+                          InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap
                         end
                       else begin // illegal instruction
                         IllegalCompInstrD = 1;
-                        InstrDecompD = {16'b0, instr16}; // preserve instruction for mtval on trap
+                        InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap
                       end
-            5'b01101: InstrDecompD = {immCJ, 5'b00000, 7'b1101111}; // c.j
-            5'b01110: InstrDecompD = {immCB[11:5], 5'b00000, rs1p, 3'b000, immCB[4:0], 7'b1100011}; // c.beqz
-            5'b01111: InstrDecompD = {immCB[11:5], 5'b00000, rs1p, 3'b001, immCB[4:0], 7'b1100011}; // c.bnez
-            5'b10000: InstrDecompD = {6'b000000, immSH, rds1, 3'b001, rds1, 7'b0010011}; // c.slli
-            5'b10001: InstrDecompD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000111}; // c.fldsp
-            5'b10010: InstrDecompD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000011}; // c.lwsp
+            5'b01101: InstrD = {immCJ, 5'b00000, 7'b1101111}; // c.j
+            5'b01110: InstrD = {immCB[11:5], 5'b00000, rs1p, 3'b000, immCB[4:0], 7'b1100011}; // c.beqz
+            5'b01111: InstrD = {immCB[11:5], 5'b00000, rs1p, 3'b001, immCB[4:0], 7'b1100011}; // c.bnez
+            5'b10000: InstrD = {6'b000000, immSH, rds1, 3'b001, rds1, 7'b0010011}; // c.slli
+            5'b10001: InstrD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000111}; // c.fldsp
+            5'b10010: InstrD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000011}; // c.lwsp
             5'b10011: if (`XLEN == 32)
-                        InstrDecompD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000111}; // c.flwsp
+                        InstrD = {immCILSP, 5'b00010, 3'b010, rds1, 7'b0000111}; // c.flwsp
                       else 
-                        InstrDecompD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000011}; // c.ldsp
+                        InstrD = {immCILSPD, 5'b00010, 3'b011, rds1, 7'b0000011}; // c.ldsp
             5'b10100: if (instr16[12] == 0)
                         if (instr16[6:2] == 5'b00000) 
-                          InstrDecompD = {7'b0000000, 5'b00000, rds1, 3'b000, 5'b00001, 7'b1100111}; // c.jalr
+                          InstrD = {7'b0000000, 5'b00000, rds1, 3'b000, 5'b00001, 7'b1100111}; // c.jalr
                         else
-                          InstrDecompD = {7'b0000000, rs2, 5'b00000, 3'b000, rds1, 7'b0110011}; // c.mv
+                          InstrD = {7'b0000000, rs2, 5'b00000, 3'b000, rds1, 7'b0110011}; // c.mv
                       else
                         if (rs2 == 5'b00000)
                           if (rds1 == 5'b00000) 
-                            InstrDecompD = {12'b1, 5'b00000, 3'b000, 5'b00000, 7'b1110011}; // c.ebreak
+                            InstrD = {12'b1, 5'b00000, 3'b000, 5'b00000, 7'b1110011}; // c.ebreak
                           else
-                            InstrDecompD = {12'b0, rds1, 3'b000, 5'b00001, 7'b1100111}; // c.jalr
+                            InstrD = {12'b0, rds1, 3'b000, 5'b00001, 7'b1100111}; // c.jalr
                         else
-                          InstrDecompD = {7'b0000000, rs2, rds1, 3'b000, rds1, 7'b0110011}; // c.add
-            5'b10101: InstrDecompD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100111}; // c.fsdsp
-            5'b10110: InstrDecompD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100011}; // c.swsp
+                          InstrD = {7'b0000000, rs2, rds1, 3'b000, rds1, 7'b0110011}; // c.add
+            5'b10101: InstrD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100111}; // c.fsdsp
+            5'b10110: InstrD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100011}; // c.swsp
             5'b10111: if (`XLEN==32)
-                        InstrDecompD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100111}; // c.fswsp
+                        InstrD = {immCSS[11:5], rs2, 5'b00010, 3'b010, immCSS[4:0], 7'b0100111}; // c.fswsp
                       else
-                        InstrDecompD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100011}; // c.sdsp
+                        InstrD = {immCSSD[11:5], rs2, 5'b00010, 3'b011, immCSSD[4:0], 7'b0100011}; // c.sdsp
             default: begin // illegal instruction
                         IllegalCompInstrD = 1;
-                        InstrDecompD = {16'b0, instr16}; // preserve instruction for mtval on trap
+                        InstrD = {16'b0, instr16}; // preserve instruction for mtval on trap
                       end
           endcase
         end
diff --git a/wally-pipelined/src/wallypipelinedhart.sv b/wally-pipelined/src/wallypipelinedhart.sv
index db0debf8..1bcae36f 100644
--- a/wally-pipelined/src/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wallypipelinedhart.sv
@@ -45,12 +45,14 @@ module wallypipelinedhart (
   // new signals that must connect through DP
   logic        CSRWriteM, PrivilegedM;
   logic [`XLEN-1:0] SrcAM;
-  logic [31:0] InstrM;
-  logic [`XLEN-1:0] PCM;
+  logic [31:0] InstrD, InstrM;
+  logic [`XLEN-1:0] PCE, PCM, PCW;
+  logic [`XLEN-1:0] PCTargetE;
   logic [`XLEN-1:0] CSRReadValM;
   logic [`XLEN-1:0] PrivilegedNextPCM;
   logic InstrValidW;
-  logic InstrMisalignedFaultM, IllegalIEUInstrFaultD;
+  logic InstrMisalignedFaultM;
+  logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
   logic LoadMisalignedFaultM, LoadAccessFaultM;
   logic StoreMisalignedFaultM, StoreAccessFaultM;
   logic [`XLEN-1:0] InstrMisalignedAdrM;
@@ -67,41 +69,22 @@ module wallypipelinedhart (
   logic [2:0] FRM_REGW;
   logic       FloatRegWriteW;
            
+  ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache
+
   ieu ieu(.*); // inteber execution unit: integer register file, datapath and controller
-/*  ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache
+//  dcu dcu(.*); // data cache unit
+/*  
   mdu mdu(.*); // multiply and divide unit
   fpu fpu(.*); // floating point unit
-  dcu dcu(.*); // data cache unit
   ebu ebu(.*); // external bus to memory and peripherals */
-//  privileged pcu(.*); // privileged control unit CSRs, traps, privilege mode
   hazard     hzu(.*);	// global stall and flush control
 
   // Priveleged block operates in M and W stages, handling CSRs and exceptions
   privileged priv(.*);
 
-/*
-  input  logic clk, reset,
-  input  logic        CSRWriteM,
-  input  logic [`XLEN-1:0] SrcAM,
-  input  logic [31:0] InstrM,
-  input  logic [`XLEN-1:0] PCM,
-  output logic [`XLEN-1:0] CSRReadValM,
-  output logic [`XLEN-1:0] PrivilegedNextPCM,
-  output logic RetM, TrapM,
-  input  logic InstrValidW, FloatRegWriteW, LoadStallD,
-  input  logic PrivilegedM,
-  input  logic InstrMisalignedFaultM, InstrAccessFaultM, IllegalInstrFaultInM,
-  input  logic LoadMisalignedFaultM, LoadAccessFaultM,
-  input  logic StoreMisalignedFaultM, StoreAccessFaultM,
-  input  logic TimerIntM, ExtIntM, SwIntM,
-  input  logic [`XLEN-1:0] InstrMisalignedAdrM, ALUResultM,
-  input  logic [4:0]      SetFflagsM,
-  output logic [2:0]      FRM_REGW
-*/
-
   // add FPU here, with SetFflagsM, FRM_REGW
   // presently stub out SetFlagsM and FloatRegWriteW
   assign SetFflagsM = 0;
-  //assign FloatRegWriteW = 0;
+  assign FloatRegWriteW = 0;
              
 endmodule
diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv
index ae591dd5..7e88ec0f 100644
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@@ -241,6 +241,7 @@ string tests32i[] = {
 
   // instantiate device to be tested
   assign GPIOPinsIn = 0;
+  assign UARTSin = 1;
   wallypipelined dut(
     clk, reset, WriteData, DataAdr, MemRW, 
     GPIOPinsIn, GPIOPinsOut, GPIOPinsEn, UARTSin, UARTSout
@@ -248,8 +249,8 @@ string tests32i[] = {
 
   // Track names of instructions
   instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
-                dut.hart.ieu.dp.InstrDecompD, dut.hart.ieu.dp.InstrE,
-                dut.hart.ieu.dp.InstrM,  InstrW,
+                dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
+                dut.hart.ifu.InstrM,  InstrW,
                 InstrDName, InstrEName, InstrMName, InstrWName);
 
   // initialize test