diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh
index 8e3ec34df..e56fb7a21 100644
--- a/pipelined/config/rv64gc/wally-config.vh
+++ b/pipelined/config/rv64gc/wally-config.vh
@@ -139,7 +139,7 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define BPRED_ENABLED 1
+`define BPRED_ENABLED 0
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
 `define BPRED_SIZE 10
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index 044ebdcc3..c21ab754b 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -63,7 +63,7 @@ module fdivsqrt(
   logic [`DIVb:0]   FirstU, FirstUM;
   logic [`DIVb+1:0] FirstC;
   logic Firstun;
-  logic WZeroM, AZeroM, BZeroM, AZeroE, BZeroE;
+  logic WZeroE, AZeroM, BZeroM, AZeroE, BZeroE;
   logic SpecialCaseM, MDUM;
   logic [`DIVBLEN:0] nE, nM, mM;
   logic CalcOTFCSwapE, OTFCSwapE, ALTBM, AsM;
@@ -80,15 +80,16 @@ module fdivsqrt(
     .FDivBusyE, .FDivStartE, .IDivStartE, .IFDivStartE, .FDivDoneE, .StallM, .FlushE, /*.DivDone, */ 
     .XZeroE, .YZeroE, .AZeroE, .BZeroE,
     .XNaNE, .YNaNE, .MDUE,
-    .XInfE, .YInfE, .WZeroM, .SpecialCaseM);
+    .XInfE, .YInfE, .WZeroE, .SpecialCaseM);
   fdivsqrtiter fdivsqrtiter(
     .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .MDUE, .SqrtE, // .SqrtM,
     .X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
     .IFDivStartE, .CalcOTFCSwapE, .OTFCSwapE,
     .FDivBusyE);
   fdivsqrtpostproc fdivsqrtpostproc(
-    .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .MDUM,
+    .clk, .reset, .StallM,
+    .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .MDUE, .Firstun, 
     .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAM,
     .nM, .ALTBM, .mM, .BZeroM, .AsM, .OTFCSwapEM(OTFCSwapE),
-    .QmM, .WZeroM, .DivSM, .FPIntDivResultM);
+    .QmM, .WZeroE, .DivSM, .FPIntDivResultM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv
new file mode 100644
index 000000000..fe95c50da
--- /dev/null
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv
@@ -0,0 +1,76 @@
+///////////////////////////////////////////
+// fdivsqrtpreproc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module fdivsqrtexpcalc(
+  input  logic [`FMTBITS-1:0] Fmt,
+  input  logic [`NE-1:0] Xe, Ye,
+  input  logic Sqrt,
+  input  logic XZeroE, 
+  input  logic [`DIVBLEN:0] ell, m,
+  output logic [`NE+1:0] Qe
+  );
+  logic [`NE-2:0] Bias;
+  logic [`NE+1:0] SXExp;
+  logic [`NE+1:0] SExp;
+  logic [`NE+1:0] DExp;
+  
+  if (`FPSIZES == 1) begin
+      assign Bias = (`NE-1)'(`BIAS); 
+
+  end else if (`FPSIZES == 2) begin
+      assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+
+  end else if (`FPSIZES == 3) begin
+      always_comb
+          case (Fmt)
+              `FMT: Bias  =  (`NE-1)'(`BIAS);
+              `FMT1: Bias = (`NE-1)'(`BIAS1);
+              `FMT2: Bias = (`NE-1)'(`BIAS2);
+              default: Bias = 'x;
+          endcase
+
+  end else if (`FPSIZES == 4) begin        
+    always_comb
+        case (Fmt)
+            2'h3: Bias =  (`NE-1)'(`Q_BIAS);
+            2'h1: Bias =  (`NE-1)'(`D_BIAS);
+            2'h0: Bias =  (`NE-1)'(`S_BIAS);
+            2'h2: Bias =  (`NE-1)'(`H_BIAS);
+        endcase
+  end
+  assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
+  assign SExp  = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
+  // correct exponent for denormalized input's normalization shifts
+  assign DExp  = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZeroE}};
+  
+  assign Qe = Sqrt ? SExp : DExp;
+endmodule
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index d287416ee..f63168290 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -43,7 +43,7 @@ module fdivsqrtfsm(
   input  logic SqrtE,
   input  logic StallM,
   input  logic FlushE,
-  input  logic WZeroM,
+  input  logic WZeroE,
   input  logic MDUE,
   input  logic [`DIVBLEN:0] nE,
   output logic IFDivStartE,
@@ -116,7 +116,8 @@ module fdivsqrtfsm(
           if (SpecialCaseE) state <= #1 DONE;
           else             state <= #1 BUSY;
       end else if (state == BUSY) begin 
-          if (step == 1 /*| WZeroM */)  state <= #1 DONE; // finished steps or terminate early on zero residual
+//          if (step == 1 | WZeroE)  state <= #1 DONE; // finished steps or terminate early on zero residual
+          if (step == 1)  state <= #1 DONE; // finished steps or terminate early on zero residual
           step <= step - 1;
       end else if (state == DONE) begin
         if (StallM) state <= #1 DONE;
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
index e963df4a7..8eaf98afa 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@@ -31,16 +31,19 @@
 `include "wally-config.vh"
 
 module fdivsqrtpostproc(
+  input  logic              clk, reset,
+  input  logic              StallM,
   input  logic [`DIVb+3:0]  WS, WC,
   input  logic [`DIVb-1:0]  D, 
   input  logic [`DIVb:0]    FirstU, FirstUM, 
   input  logic [`DIVb+1:0]  FirstC,
+  input  logic              SqrtE, MDUE,
   input  logic              Firstun, SqrtM, SpecialCaseM, OTFCSwapEM,
 	input  logic [`XLEN-1:0]  ForwardedSrcAM,
-  input  logic              RemOpM, ALTBM, BZeroM, AsM, MDUM,
+  input  logic              RemOpM, ALTBM, BZeroM, AsM, 
   input  logic [`DIVBLEN:0] nM, mM,
   output logic [`DIVb:0]    QmM, 
-  output logic              WZeroM,
+  output logic              WZeroE,
   output logic              DivSM,
   output logic [`XLEN-1:0]  FPIntDivResultM
 );
@@ -48,37 +51,56 @@ module fdivsqrtpostproc(
   logic [`DIVb+3:0] W, Sum, DM;
   logic [`DIVb:0] PreQmM;
   logic NegStickyM, PostIncM;
-  logic weq0;
+  logic weq0E;
   logic [`DIVBLEN:0] NormShiftM;
   logic [`DIVb:0] IntQuotM, NormQuotM;
   logic [`DIVb+3:0] IntRemM, NormRemM;
   logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
+  logic WZeroM;
 
-  // check for early termination on an exact result.  If the result is not exact, the sticky should be set
-  aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0);
+  //////////////////////////
+  // Execute Stage: Detect early termination for an exact result
+  //////////////////////////
 
-  if (`RADIX == 2) begin
-    logic [`DIVb+3:0] FZero;
+  // check for early termination on an exact result. 
+  aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0E);
+
+  if (`RADIX == 2) begin: R2EarlyTerm
+    logic [`DIVb+3:0] FZeroE;
     logic [`DIVb+2:0] FirstK;
-    logic wfeq0;
+    logic wfeq0E;
     logic [`DIVb+3:0] WCF, WSF;
 
     assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
-    assign FZero = (SqrtM & ~MDUM) ? {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0} : {3'b001,D,1'b0};
-    csa #(`DIVb+4) fadd(WS, WC, FZero, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
-    aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0);
-    assign WZeroM = weq0|(wfeq0 & Firstun);
+    assign FZeroE = (SqrtE & ~MDUE) ? {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0} : {3'b001,D,1'b0};
+    csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
+    aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
+    assign WZeroE = weq0E|(wfeq0E & Firstun);
   end else begin
-    assign WZeroM = weq0;
+    assign WZeroE = weq0E;
   end 
+
+  //////////////////////////
+  // E/M Pipeline register
+  //////////////////////////
+ 
+  flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
+
+  //////////////////////////
+  // Memory Stage: Postprocessing
+  //////////////////////////
+
+  //  If the result is not exact, the sticky should be set
   assign DivSM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide
 
-  // Determine if sticky bit is negative
+  // Determine if sticky bit is negative  // *** look for ways to optimize this
   assign Sum = WC + WS;
   assign W = $signed(Sum) >>> `LOGR;
   assign NegStickyM = W[`DIVb+3];
   assign DM = {4'b0001, D};
 
+  // *** put conditionals on integer division hardware, move to its own module
+
   // Integer division: sign handling for div and rem
   always_comb 
     if (~AsM)
@@ -92,7 +114,8 @@ module fdivsqrtpostproc(
         PostIncM  = 0;
       end
     else 
-      if (NegStickyM | weq0) begin
+//      if (NegStickyM | weq0) begin // *** old code, replaced by the one below in the right stage and more comprehensive
+      if (NegStickyM | WZeroM) begin
         NormQuotM = FirstU;
         NormRemM  = W;
         PostIncM  = 0;
@@ -111,13 +134,14 @@ module fdivsqrtpostproc(
       IntQuotM = '0;
       IntRemM  = {{(`DIVb-`XLEN+4){1'b0}}, ForwardedSrcAM};
     end else if (WZeroM) begin
-      if (weq0) begin
+    // *** dh: 12/26: don't understand this logic and why weq0 inside WZero check.  Need a divide by 0 check here
+/*      if (weq0) begin */
         IntQuotM = FirstU;
         IntRemM  = '0;
-      end else begin
+/*      end else begin
         IntQuotM = FirstUM;
         IntRemM  = '0;
-      end
+      end */
     end else begin 
       IntQuotM = NormQuotM;
       IntRemM  = NormRemM;
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index c68cd25d4..6711441f5 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -114,6 +114,8 @@ module fdivsqrtpreproc (
   else              assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
   assign X = MDUE ? DivX >> RightShiftX : PreShiftX;
 
+  fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZeroE, .ell, .m(mE), .Qe(QeE));
+
   //           radix 2     radix 4
   // 1 copies  DIVLEN+2    DIVLEN+2/2
   // 2 copies  DIVLEN+2/2  DIVLEN+2/2*2
@@ -134,51 +136,7 @@ module fdivsqrtpreproc (
   flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
   flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
   flopen #(`XLEN)   srcareg(clk, IFDivStartE, ForwardedSrcAE, ForwardedSrcAM);
-  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZeroE, .ell, .m(mE), .Qe(QeE));
+
 
 endmodule
 
-module expcalc(
-  input  logic [`FMTBITS-1:0] Fmt,
-  input  logic [`NE-1:0] Xe, Ye,
-  input  logic Sqrt,
-  input  logic XZeroE, 
-  input  logic [`DIVBLEN:0] ell, m,
-  output logic [`NE+1:0] Qe
-  );
-  logic [`NE-2:0] Bias;
-  logic [`NE+1:0] SXExp;
-  logic [`NE+1:0] SExp;
-  logic [`NE+1:0] DExp;
-  
-  if (`FPSIZES == 1) begin
-      assign Bias = (`NE-1)'(`BIAS); 
-
-  end else if (`FPSIZES == 2) begin
-      assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
-
-  end else if (`FPSIZES == 3) begin
-      always_comb
-          case (Fmt)
-              `FMT: Bias  =  (`NE-1)'(`BIAS);
-              `FMT1: Bias = (`NE-1)'(`BIAS1);
-              `FMT2: Bias = (`NE-1)'(`BIAS2);
-              default: Bias = 'x;
-          endcase
-
-  end else if (`FPSIZES == 4) begin        
-    always_comb
-        case (Fmt)
-            2'h3: Bias =  (`NE-1)'(`Q_BIAS);
-            2'h1: Bias =  (`NE-1)'(`D_BIAS);
-            2'h0: Bias =  (`NE-1)'(`S_BIAS);
-            2'h2: Bias =  (`NE-1)'(`H_BIAS);
-        endcase
-  end
-  assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
-  assign SExp  = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
-  // correct exponent for denormalized input's normalization shifts
-  assign DExp  = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZeroE}};
-  
-  assign Qe = Sqrt ? SExp : DExp;
-endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index 3da4523ac..df9857c0a 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -1191,8 +1191,8 @@ string imperas32f[] = '{
     "rv64i_m/F/src/fsub_b4-01.S",
     "rv64i_m/F/src/fsub_b5-01.S",
     "rv64i_m/F/src/fsub_b7-01.S",
-    "rv64i_m/F/src/fsub_b8-01.S"
-    // "rv64i_m/F/src/fsw-align-01.S"
+    "rv64i_m/F/src/fsub_b8-01.S",
+    "rv64i_m/F/src/fsw-align-01.S"
     };
 
 
@@ -1279,8 +1279,8 @@ string imperas32f[] = '{
     "rv64i_m/D/src/fle.d_b19-01.S",
     "rv64i_m/D/src/flt.d_b1-01.S",
     "rv64i_m/D/src/flt.d_b19-01.S",
-    "rv64i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back
-    "rv64i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266
+    "rv64i_m/D/src/fld-align-01.S", 
+    "rv64i_m/D/src/fsd-align-01.S", 
     "rv64i_m/D/src/fmadd.d_b14-01.S",
     "rv64i_m/D/src/fmadd.d_b16-01.S",
     "rv64i_m/D/src/fmadd.d_b17-01.S",
@@ -1551,8 +1551,8 @@ string imperas32f[] = '{
     "rv32i_m/F/src/fsub_b4-01.S",
     "rv32i_m/F/src/fsub_b5-01.S",
     "rv32i_m/F/src/fsub_b7-01.S",
-    "rv32i_m/F/src/fsub_b8-01.S"
-    // "rv32i_m/F/src/fsw-align-01.S"
+    "rv32i_m/F/src/fsub_b8-01.S",
+    "rv32i_m/F/src/fsw-align-01.S"
     };
 
   string arch32d[] = '{
@@ -1618,8 +1618,8 @@ string imperas32f[] = '{
     "rv32i_m/D/src/fle.d_b19-01.S",
     "rv32i_m/D/src/flt.d_b1-01.S",
     "rv32i_m/D/src/flt.d_b19-01.S",
-    "rv32i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back
-    "rv32i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266
+    "rv32i_m/D/src/fld-align-01.S", 
+    "rv32i_m/D/src/fsd-align-01.S", 
     "rv32i_m/D/src/fmadd.d_b14-01.S",
     "rv32i_m/D/src/fmadd.d_b16-01.S",
     "rv32i_m/D/src/fmadd.d_b17-01.S",