diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv
index eecb1f983..bad52a948 100644
--- a/wally-pipelined/src/ifu/ifu.sv
+++ b/wally-pipelined/src/ifu/ifu.sv
@@ -47,6 +47,8 @@ module ifu (
   input logic [`XLEN-1:0]  PrivilegedNextPCM, 
   output logic [31:0] 	   InstrD, InstrM,
   output logic [`XLEN-1:0] PCM, 
+  output logic [3:0] InstrClassM,
+  output logic BPPredWrongM,
   // Writeback
   // output logic [`XLEN-1:0] PCLinkW,
   // Faults
@@ -206,6 +208,20 @@ module ifu (
 			       .d(InstrClassD),
 			       .q(InstrClassE));
 
+  flopenrc #(4) InstrClassRegM(.clk(clk),
+			       .reset(reset),
+			       .en(~StallM),
+			       .clear(FlushM),
+			       .d(InstrClassE),
+			       .q(InstrClassM));
+
+  flopenrc #(1) BPPredWrongRegM(.clk(clk),
+			       .reset(reset),
+			       .en(~StallM),
+			       .clear(FlushM),
+			       .d(BPPredWrongE),
+			       .q(BPPredWrongM));
+
   // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.  
   // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
   // have dedicated adder in Mem stage based on PCM + 2 or 4
diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv
index 118922867..0c38b6883 100644
--- a/wally-pipelined/src/privileged/csr.sv
+++ b/wally-pipelined/src/privileged/csr.sv
@@ -33,7 +33,8 @@ module csr (
   input  logic [`XLEN-1:0] PCM, SrcAM,
   input  logic             CSRReadM, CSRWriteM, TrapM, MTrapM, STrapM, UTrapM, mretM, sretM, uretM,
   input  logic             TimerIntM, ExtIntM, SwIntM,
-  input  logic             InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongE,
+  input  logic             InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
+  input  logic [3:0]       InstrClassM,
   input  logic [1:0]       NextPrivilegeModeM, PrivilegeModeW,
   input  logic [`XLEN-1:0] CauseM, NextFaultMtvalM,
   output logic [1:0]       STATUS_MPP,
diff --git a/wally-pipelined/src/privileged/csrc.sv b/wally-pipelined/src/privileged/csrc.sv
index 57bac3c24..dbf21e915 100644
--- a/wally-pipelined/src/privileged/csrc.sv
+++ b/wally-pipelined/src/privileged/csrc.sv
@@ -29,7 +29,8 @@
 
 module csrc ( 
     input  logic             clk, reset,
-    input  logic             InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongE,
+    input  logic             InstrValidW, LoadStallD, CSRMWriteM, BPPredWrongM,
+    input  logic [3:0]      InstrClassM,
     input  logic [11:0]      CSRAdrM,
     input  logic [1:0]       PrivilegeModeW,
     input  logic [`XLEN-1:0] CSRWriteValM,
@@ -62,8 +63,9 @@ module csrc (
     assign MCOUNTEN[1] = 1'b0;
     assign MCOUNTEN[2] = InstrValidW;
     assign MCOUNTEN[3] = LoadStallD;
-    assign MCOUNTEN[4] = BPPredWrongE;
-    assign MCOUNTEN[`COUNTERS:5] = 0; 
+    assign MCOUNTEN[4] = BPPredWrongM;
+    assign MCOUNTEN[5] = InstrClassM[0];
+    assign MCOUNTEN[`COUNTERS:6] = 0; 
 
     genvar j;       
     generate
diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv
index a2818b6a4..8a6854e98 100644
--- a/wally-pipelined/src/privileged/privileged.sv
+++ b/wally-pipelined/src/privileged/privileged.sv
@@ -36,7 +36,8 @@ module privileged (
   output logic [`XLEN-1:0] CSRReadValW,
   output logic [`XLEN-1:0] PrivilegedNextPCM,
   output logic             RetM, TrapM,
-  input  logic             InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongE,
+  input  logic             InstrValidW, FloatRegWriteW, LoadStallD, BPPredWrongM,
+  input  logic [3:0]       InstrClassM,
   input  logic             PrivilegedM,
   input  logic             InstrMisalignedFaultM, InstrAccessFaultF, IllegalIEUInstrFaultD,
   input  logic             LoadMisalignedFaultM, LoadAccessFaultM,
diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv
index 29e9d3428..5975015ff 100644
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@@ -110,7 +110,9 @@ module wallypipelinedhart (
   logic             InstrReadF;
   logic             DataStall, InstrStall;
   logic             InstrAckD, MemAckW;
-  logic             BPPredWrongE;
+
+  logic             BPPredWrongE, BPPredWrongM;
+  logic [3:0]       InstrClassM;
   
            
   ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache