diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh
index 6ca3a56c4..65d27aa38 100644
--- a/pipelined/config/rv64gc/wally-config.vh
+++ b/pipelined/config/rv64gc/wally-config.vh
@@ -139,7 +139,7 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define BPRED_ENABLED 0
+`define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
 `define BPRED_SIZE 10
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index cb2930a7a..e047d947a 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -109,7 +109,7 @@
 `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+7) ? (`DIVRESLEN+`NF) : (3*`NF+5))//change
 
 // division constants
-`define RADIX 32'h2
+`define RADIX 32'h4
 `define DIVCOPIES 32'h4
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3)
 // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index a5735ba3b..a950ea7b3 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -69,7 +69,7 @@ module fdivsqrtfsm(
     assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this.  Should other special cases be considered?
     assign SpecialCaseE  = MDUE ? ISpecialCaseE : FSpecialCaseE;
   end else assign SpecialCaseE = FSpecialCaseE;
-  flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
+  flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
 
 // DIVN = `NF+3
 // NS = NF + 1
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
index 8bedd3841..c78738a4a 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@@ -52,9 +52,6 @@ module fdivsqrtpostproc(
   logic [`DIVb:0] PreQmM;
   logic NegStickyM;
   logic weq0E, weq0M, WZeroM;
-  logic [`DIVBLEN:0] NormShiftM;
-  logic [`DIVb:0] NormQuotM;
-  logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM;
   logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
   logic [`XLEN-1:0] SpecialFPIntDivResultM;
 
@@ -104,33 +101,26 @@ module fdivsqrtpostproc(
   assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
 
   if (`IDIV_ON_FPU) begin
+    logic [`DIVBLEN:0] NormShiftM;
+    logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM, NormRemDM;
+
     assign W = $signed(Sum) >>> `LOGR;
     assign DM = {4'b0001, D};
 
-    // Integer division: sign handling for div and rem
-    always_comb 
-      if (~AsM)
-        if (NegStickyM) begin
-          NormQuotM = FirstUM;
-          NormRemM  = W + DM;
-        end else begin
-          NormQuotM = FirstU;
-          NormRemM  = W;
-        end
-      else 
-        if (NegStickyM) begin
-          NormQuotM = FirstUM;
-          NormRemM  = -(W + DM);
-        end else begin 
-          NormQuotM = FirstU;
-          NormRemM  = -W;
-        end
+    // Integer remainder: sticky and sign correction muxes
+    mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM);
+    mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
 
-    // Integer division: Special cases
+    // special case logic
     always_comb
-      if (ALTBM) begin
-        IntQuotM = '0;
-        IntRemM  = {{(`DIVb-`XLEN+4){1'b0}}, AM};
+      if (BZeroM) begin 
+        if (RemOpM) SpecialFPIntDivResultM = AM;
+        else        SpecialFPIntDivResultM = {(`XLEN){1'b1}};
+      end else if (ALTBM) begin
+        if (RemOpM) SpecialFPIntDivResultM = AM;
+        else        SpecialFPIntDivResultM = '0;
+ //       IntQuotM = '0;
+ //       IntRemM  = {{(`DIVb-`XLEN+4){1'b0}}, AM};
       end else begin
         logic [`DIVb+3:0] PreIntQuotM;
         if (WZeroM) begin
@@ -142,36 +132,28 @@ module fdivsqrtpostproc(
             IntRemM  = '0;
           end 
         end else begin 
-          PreIntQuotM = {3'b000, NormQuotM};
+          PreIntQuotM = {3'b000, PreQmM};
           IntRemM  = NormRemM;
         end 
         // flip sign if necessary
         if (NegQuotM) IntQuotM = -PreIntQuotM;
         else          IntQuotM =  PreIntQuotM;
-      end
-    
-    always_comb
-      if (RemOpM) begin
-        NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder
-        PreResultM = IntRemM;
-      end else begin
-        NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
-        PreResultM = IntQuotM;
-        /*
-        if (~ALTBM & NegQuotM) begin
-          PreResultM = {3'b111, -IntQuotM};
+        if (RemOpM) begin
+          NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder
+          PreResultM = IntRemM;
         end else begin
-          PreResultM = {3'b000, IntQuotM};
-        end*/
-        //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender
+          NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
+          PreResultM = IntQuotM;
+        end
+        PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
+        SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
       end
-    
 
-    // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
-    
-    assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
-    assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases
-    // *** conditional on RV64
-    assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64
+    // sign extend result for W64
+    if (`XLEN==64)
+      assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : 
+                                       SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64
+    else
+      assign FPIntDivResultM = SpecialFPIntDivResultM[`XLEN-1:0];
   end
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index b3f42a7c4..63d391ae9 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -51,26 +51,23 @@ module fdivsqrtpreproc (
 );
 
   logic  [`DIVb-1:0] XPreproc;
-  logic  [`DIVb:0] SqrtX;
-  logic  [`DIVb+3:0] DivX;
+  logic  [`DIVb:0] PreSqrtX;
+  logic  [`DIVb+3:0] DivX, SqrtX;
   logic  [`NE+1:0] QeE;
-  // Intdiv signals
   logic  [`DIVb-1:0] IFNormLenX, IFNormLenD;
-  logic  [`DIVBLEN:0] mE;
-  logic  [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
-  logic  [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
-  logic  [`LOGRK:0] pPrTrunc;
+  logic  [`DIVBLEN:0] mE, ell;
   logic  [`DIVb+3:0]  PreShiftX;
   logic  NumZeroE;
 
-  // ***can probably merge X LZC with conversion
-  // cout the number of leading zeros
-
   if (`IDIV_ON_FPU) begin
     logic signedDiv;
     logic  AsE, BsE, ALTBE, NegQuotE;
     logic  [`XLEN-1:0]  AE, BE;
     logic  [`XLEN-1:0] PosA, PosB;
+    logic  [`DIVBLEN:0] ZeroDiff, IntBits;
+    logic  [`LOGRK-1:0] RightShiftX;
+    logic  [`DIVBLEN:0] pPlusr, pPrCeil, p;
+    logic  [`LOGRK-1:0] pPrTrunc;
 
     // Extract inputs, signs, zero, depending on W64 mode if applicable
     assign signedDiv = ~Funct3E[0];
@@ -107,13 +104,13 @@ module fdivsqrtpreproc (
     assign p = ALTBE ? '0 : ZeroDiff;
 
   /* verilator lint_off WIDTH */
-    // right shift amount to complete in discrete number of steps
-    assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
+    // calculate number of cycles nE right shift amount RightShiftX to complete in discrete number of steps
+    assign pPlusr = `LOGR + p;
     assign pPrTrunc = pPlusr % `RK;
-    assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)};
-    assign nE = (pPrCeil * (`DIVBLEN+1)'(`DIVCOPIES)) - {{(`DIVBLEN){1'b0}}, 1'b1};
-    assign IntBits = (`DIVBLEN)'(`LOGR) + p - {{(`DIVBLEN){1'b0}}, 1'b1};
-    assign RightShiftX = ((`DIVBLEN)'(`RK) - 1) - (IntBits % `RK);
+    assign pPrCeil = (pPlusr >> `LOGRK) + |pPrTrunc;
+    assign nE = (pPrCeil * `DIVCOPIES) - 1;
+    assign IntBits = `LOGR + p - 1;
+    assign RightShiftX = `RK - 1 - IntBits % `RK;
   /* verilator lint_on WIDTH */
 
     // Selet integer or floating-point operands
@@ -148,16 +145,16 @@ module fdivsqrtpreproc (
   assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); 
 
   //  append leading 1 (for nonzero inputs) and zero-extend
-  assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
+  // *** explain this next line
+  assign PreSqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
   assign DivX = {3'b000, ~NumZeroE, XPreproc};
-
-  // *** explain why X is shifted between radices (initial assignment of WS=RX)
-  if (`RADIX == 2)  assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
-  else              assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
-
+  // Sqrt is initialized after a first step of R(X-1), which depends on Radix
+  if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
+  else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
+  assign PreShiftX = Sqrt ? SqrtX : DivX;
+ 
   // Floating-point exponent
   fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
-
-  flopen #(`NE+2)    expreg(clk, IFDivStartE, QeE, QeM);
+  flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
 endmodule
 
diff --git a/pipelined/src/generic/mem/ram2p1r1wb.sv b/pipelined/src/generic/mem/ram2p1r1wb.sv
index c11246d3d..dac1290f9 100644
--- a/pipelined/src/generic/mem/ram2p1r1wb.sv
+++ b/pipelined/src/generic/mem/ram2p1r1wb.sv
@@ -49,21 +49,21 @@ module ram2p1r1wb
    input logic              reset,
   
    // port 1 is read only
-   input logic [DEPTH-1:0]  RA1,
-   output logic [WIDTH-1:0] RD1,
-   input logic              REN1,
+   input logic [DEPTH-1:0]  ra1,
+   output logic [WIDTH-1:0] rd1,
+   input logic              ren1,
   
    // port 2 is write only
-   input logic [DEPTH-1:0]  WA1,
-   input logic [WIDTH-1:0]  WD1,
-   input logic              WEN1,
-   input logic [WIDTH-1:0]  BitWEN1
+   input logic [DEPTH-1:0]  wa2,
+   input logic [WIDTH-1:0]  wd2,
+   input logic              wen2,
+   input logic [WIDTH-1:0]  bwe2
 );
   
 
-  logic [DEPTH-1:0]         RA1Q, WA1Q;
-  logic                     WEN1Q;
-  logic [WIDTH-1:0]         WD1Q;
+  logic [DEPTH-1:0]         ra1q, wa2q;
+  logic                     wen2q;
+  logic [WIDTH-1:0]         wd2q;
 
   logic [WIDTH-1:0]         mem[2**DEPTH-1:0];
   logic [WIDTH-1:0]         bwe;
@@ -76,18 +76,18 @@ module ram2p1r1wb
   //  prefer not to have two-cycle write latency
   //  will require branch predictor changes
   
-  flopenr #(DEPTH) RA1Reg(clk, reset, REN1, RA1, RA1Q);
-  flopenr #(DEPTH) WA1Reg(clk, reset, REN1, WA1, WA1Q);
-  flopr   #(1)     WEN1Reg(clk, reset, WEN1, WEN1Q);
-  flopenr #(WIDTH) WD1Reg(clk, reset, REN1, WD1, WD1Q);
+  flopenr #(DEPTH) ra1Reg(clk, reset, ren1, ra1, ra1q);
+  flopenr #(DEPTH) wa2Reg(clk, reset, ren1, wa2, wa2q);
+  flopr   #(1)     wen2Reg(clk, reset, wen2, wen2q);
+  flopenr #(WIDTH) wd2Reg(clk, reset, ren1, wd2, wd2q);
 
   // read port
-  assign RD1 = mem[RA1Q];
+  assign rd1 = mem[ra1q];
   
   // write port
-  assign bwe = {WIDTH{WEN1Q}} & BitWEN1;
+  assign bwe = {WIDTH{wen2q}} & bwe2;
   always_ff @(posedge clk)
-    mem[WA1Q] <= WD1Q & bwe | mem[WA1Q] & ~bwe;
+    mem[wa2q] <= wd2q & bwe | mem[wa2q] & ~bwe;
  
 endmodule  
 
diff --git a/pipelined/src/ifu/BTBPredictor.sv b/pipelined/src/ifu/BTBPredictor.sv
index f03bbdd59..3c90497e4 100644
--- a/pipelined/src/ifu/BTBPredictor.sv
+++ b/pipelined/src/ifu/BTBPredictor.sv
@@ -105,13 +105,13 @@ module BTBPredictor
   // *** optimize for byte write enables
   ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk),
           .reset(reset),
-          .RA1(LookUpPCIndex),
-          .RD1({{InstrClass, TargetPC}}),
-          .REN1(~StallF),
-          .WA1(UpdatePCIndex),
-          .WD1({UpdateInstrClass, UpdateTarget}),
-          .WEN1(UpdateEN),
-          .BitWEN1({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.
+          .ra1(LookUpPCIndex),
+          .rd1({{InstrClass, TargetPC}}),
+          .ren1(~StallF),
+          .wa2(UpdatePCIndex),
+          .wd2({UpdateInstrClass, UpdateTarget}),
+          .wen2(UpdateEN),
+          .bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.
 
 
 endmodule
diff --git a/pipelined/src/ifu/globalHistoryPredictor.sv b/pipelined/src/ifu/globalHistoryPredictor.sv
index 6d06dc8ca..29cb735d7 100644
--- a/pipelined/src/ifu/globalHistoryPredictor.sv
+++ b/pipelined/src/ifu/globalHistoryPredictor.sv
@@ -116,12 +116,12 @@ module globalHistoryPredictor
   ram2p1r1wb #(k, 2) PHT(.clk(clk),
     .reset(reset),
     //.RA1(GHR[k-1:0]),
-    .RA1(GHRLookup),
-    .RD1(BPPredF),
-    .REN1(~StallF),
-    .WA1(PHTUpdateAdr),
-    .WD1(UpdateBPPredE),
-    .WEN1(PHTUpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(GHRLookup),
+    .rd1(BPPredF),
+    .ren1(~StallF),
+    .wa2(PHTUpdateAdr),
+    .wd2(UpdateBPPredE),
+    .wen2(PHTUpdateEN),
+    .bwe2(2'b11));
 
 endmodule
diff --git a/pipelined/src/ifu/gsharePredictor.sv b/pipelined/src/ifu/gsharePredictor.sv
index ff111a3e4..fa7801949 100644
--- a/pipelined/src/ifu/gsharePredictor.sv
+++ b/pipelined/src/ifu/gsharePredictor.sv
@@ -113,12 +113,12 @@ module gsharePredictor
   ram2p1r1wb #(`BPRED_SIZE, 2) PHT(.clk(clk),
     .reset(reset),
     //.RA1(GHR[`BPRED_SIZE-1:0]),
-    .RA1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]),
-    .RD1(BPPredF),
-    .REN1(~StallF),
-    .WA1(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]),
-    .WD1(UpdateBPPredE),
-    .WEN1(PHTUpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]),
+    .rd1(BPPredF),
+    .ren1(~StallF),
+    .wa2(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]),
+    .wd2(UpdateBPPredE),
+    .wen2(PHTUpdateEN),
+    .bwe2(2'b11));
 
 endmodule // gsharePredictor
diff --git a/pipelined/src/ifu/localHistoryPredictor.sv b/pipelined/src/ifu/localHistoryPredictor.sv
index 97b2b6f55..02ad4cf1e 100644
--- a/pipelined/src/ifu/localHistoryPredictor.sv
+++ b/pipelined/src/ifu/localHistoryPredictor.sv
@@ -86,13 +86,13 @@ module localHistoryPredictor
   // LHRE refers to the address that the past k branches points to in the exectution stage
   ram2p1r1wb #(k, 2) PHT(.clk(clk), 
     .reset(reset),
-    .RA1(ForwardLHRNext),
-    .RD1(PredictionMemory),
-    .REN1(~StallF),
-    .WA1(LHRFNext),
-    .WD1(UpdatePrediction),
-    .WEN1(UpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(ForwardLHRNext),
+    .rd1(PredictionMemory),
+    .ren1(~StallF),
+    .wa2(LHRFNext),
+    .wd2(UpdatePrediction),
+    .wen2(UpdateEN),
+    .bwe2(2'b11));
 
 
   
diff --git a/pipelined/src/ifu/twoBitPredictor.sv b/pipelined/src/ifu/twoBitPredictor.sv
index 5ffb29d3b..7459ea6a7 100644
--- a/pipelined/src/ifu/twoBitPredictor.sv
+++ b/pipelined/src/ifu/twoBitPredictor.sv
@@ -62,13 +62,13 @@ module twoBitPredictor
 
   ram2p1r1wb #(Depth, 2) PHT(.clk(clk),
     .reset(reset),
-    .RA1(LookUpPCIndex),
-    .RD1(PredictionMemory),
-    .REN1(~StallF),
-    .WA1(UpdatePCIndex),
-    .WD1(UpdatePrediction),
-    .WEN1(UpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(LookUpPCIndex),
+    .rd1(PredictionMemory),
+    .ren1(~StallF),
+    .wa2(UpdatePCIndex),
+    .wd2(UpdatePrediction),
+    .wen2(UpdateEN),
+    .bwe2(2'b11));
 
   // need to forward when updating to the same address as reading.
   // first we compare to see if the update and lookup addreses are the same
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index 61e45d9e4..48a29303b 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -1098,7 +1098,7 @@ string imperas32f[] = '{
     "rv64i_m/F/src/flw-align-01.S",
     "rv64i_m/F/src/fmadd_b1-01.S",
     "rv64i_m/F/src/fmadd_b14-01.S",
-    "rv64i_m/F/src/fmadd_b15-01.S",
+    //"rv64i_m/F/src/fmadd_b15-01.S",
     "rv64i_m/F/src/fmadd_b16-01.S",
     "rv64i_m/F/src/fmadd_b17-01.S",
     "rv64i_m/F/src/fmadd_b18-01.S",
@@ -1473,7 +1473,7 @@ string imperas32f[] = '{
     "rv32i_m/F/src/fmin_b19-01.S",
     "rv32i_m/F/src/fmsub_b1-01.S",
     "rv32i_m/F/src/fmsub_b14-01.S",
-    "rv32i_m/F/src/fmsub_b15-01.S",
+    //"rv32i_m/F/src/fmsub_b15-01.S",
     "rv32i_m/F/src/fmsub_b16-01.S",
     "rv32i_m/F/src/fmsub_b17-01.S",
     "rv32i_m/F/src/fmsub_b18-01.S",