diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do
index 861657308..76e3d8668 100644
--- a/wally-pipelined/regression/wally-pipelined.do
+++ b/wally-pipelined/regression/wally-pipelined.do
@@ -43,7 +43,7 @@ view wave
 do ./wave-dos/peripheral-waves.do
 
 -- Run the Simulation 
-#run 5000 
+#run 3600 
 run -all
 #quit
 noview ../testbench/testbench-imperas.sv
diff --git a/wally-pipelined/src/generic/abs.sv b/wally-pipelined/src/generic/abs.sv
new file mode 100644
index 000000000..7ddbd38b6
--- /dev/null
+++ b/wally-pipelined/src/generic/abs.sv
@@ -0,0 +1,38 @@
+///////////////////////////////////////////
+// neg.sv
+//
+// Written: David_Harris@hmc.edu 28 September 2021
+// Modified: 
+//
+// Purpose: 2's complement negator
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module abs #(parameter WIDTH = 8) (
+  input  logic [WIDTH-1:0] a,
+  output logic [WIDTH-1:0] y);
+
+  logic [WIDTH-1:0] minusa;
+
+  // select -a if sign bit of a is 1
+  neg #(WIDTH) neg(a, minusa);
+  mux2 #(WIDTH) absmux(a, minusa, a[WIDTH-1], y); 
+endmodule
+
diff --git a/wally-pipelined/src/generic/neg.sv b/wally-pipelined/src/generic/neg.sv
new file mode 100644
index 000000000..a162a5c92
--- /dev/null
+++ b/wally-pipelined/src/generic/neg.sv
@@ -0,0 +1,34 @@
+///////////////////////////////////////////
+// neg.sv
+//
+// Written: David_Harris@hmc.edu 28 September 2021
+// Modified: 
+//
+// Purpose: 2's complement negator
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module neg #(parameter WIDTH = 8) (
+  input  logic [WIDTH-1:0] a,
+  output logic [WIDTH-1:0] y);
+
+  assign y = ~a + 1;
+endmodule
+
diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv
index e7b3ff247..47a649f85 100644
--- a/wally-pipelined/src/ieu/forward.sv
+++ b/wally-pipelined/src/ieu/forward.sv
@@ -33,6 +33,7 @@ module forward(
   input logic        DivDoneE, DivBusyE,
   input logic	       FWriteIntE, FWriteIntM, FWriteIntW,
   input logic        SCE,
+  input logic        StallD,
   // Forwarding controls
   output logic [1:0] ForwardAE, ForwardBE,
   output logic       FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD
@@ -53,7 +54,7 @@ module forward(
   // Stall on dependent operations that finish in Mem Stage and can't bypass in time
    assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); 
    assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE));  
-   assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide
+   assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) /*| DivBusyE */; // *** extend with stalls for divide
    assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));
 
 endmodule
diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv
index 9571ba721..e6118cd40 100644
--- a/wally-pipelined/src/muldiv/intdiv_restoring.sv
+++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv
@@ -35,32 +35,52 @@ module intdiv_restoring (
   output logic [`XLEN-1:0] Q, REM
  );
 
-  logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift;
-  logic qi; // curent quotient bit
+  logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift, Dsaved, Din, Dabs, D2, Xabs, Xinit;
+  logic qi, qib; // curent quotient bit
   localparam STEPBITS = $clog2(`XLEN);
   logic [STEPBITS:0] step;
   logic div0;
 
+  // Setup for signed division
+  abs #(`XLEN) absd(D, Dabs);
+  mux2 #(`XLEN) dabsmux(D, Dabs, signedDivide, D2);
+  flopen #(`XLEN) dsavereg(clk, start, D2, Dsaved);
+  mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); // *** change start to init (could be delayed one from start)
+
+  abs #(`XLEN) absx(X, Xabs);
+  mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide, Xinit);
+  
   // restoring division
   mux2 #(`XLEN) wmux(W, 0, start, Win);
-  mux2 #(`XLEN) xmux(0, X, start, XQin);
+  mux2 #(`XLEN) xmux(XQ, Xinit, start, XQin);
   assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi};
-  assign {qi, Wprime} = Wshift - D; // subtractor, carry out determines quotient bit
+  assign {qib, Wprime} = {1'b0, Wshift} + ~{1'b0, Din} + 1; // subtractor, carry out determines quotient bit
+  assign qi = ~qib;
   mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, Wnext);
-  flopen #(`XLEN) wreg(clk, busy, Wnext, W);
-  flopen #(`XLEN) xreg(clk, busy, XQshift, XQ);
+  flopen #(`XLEN) wreg(clk, start | busy, Wnext, W);
+  flopen #(`XLEN) xreg(clk, start | busy, XQshift, XQ);
+
+  // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide
+ // flopen #(`XLEN) dreg(clk, start, D, Dsaved);
+  //mux2 #(`XLEN) dmux(Dsaved, D, start, Din);
 
   // outputs
   // *** sign extension, handling W instructions
-  assign div0 = (D == 0);
+  assign div0 = (Din == 0);
   mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero
   mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero
  
+ 
   // busy logic
-  always_ff @(posedge clk)
-    if (start) begin
-        busy = 1; done = 0; step = 0;
-    end else if (busy) begin
+  always_ff @(posedge clk) 
+    if (reset) begin
+        busy = 0; done = 0; step = 0;
+    end else if (start) begin
+        if (div0) done = 1;
+        else begin
+            busy = 1; done = 0; step = 1;
+        end
+    end else if (busy & ~done) begin
         step = step + 1;
         if (step[STEPBITS] | div0) begin // *** early terminate on division by 0
             step = 0;
@@ -69,7 +89,10 @@ module intdiv_restoring (
         end
     end else if (done) begin
         done = 0;
+        busy = 0;
     end
+ 
+    
 
 endmodule // muldiv
 
diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv
index 714f7ebe7..75ac11f3d 100644
--- a/wally-pipelined/src/muldiv/muldiv.sv
+++ b/wally-pipelined/src/muldiv/muldiv.sv
@@ -50,14 +50,13 @@ module muldiv (
 	 logic [`XLEN*2-1:0] ProdE; 
 
 	 logic 		     enable_q;	 
-	 logic [2:0] 	     Funct3E_Q;
+	 //logic [2:0] 	     Funct3E_Q;
 	 logic 		     div0error; // ***unused
-	 logic [`XLEN-1:0]   N, D;
-	 logic [`XLEN-1:0]   Num0, Den0;	 
+	 logic [`XLEN-1:0]   X, D;
+	 //logic [`XLEN-1:0]   Num0, Den0;	 
 
 	 logic 		     gclk;
-	 logic 		     DivStartE;
-	 logic 		     startDivideE;
+	 logic 		     startDivideE, busy;
 	 logic 		     signedDivide;	 
 	 
 	 // Multiplier
@@ -72,37 +71,21 @@ module muldiv (
 
 	 // Handle sign extension for W-type instructions
 	 if (`XLEN == 64) begin // RV64 has W-type instructions
-            assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE;
-            assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE;
+            assign X = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE;
+            assign D = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE;
 	 end else begin // RV32 has no W-type instructions
-            assign Num0 = SrcAE;
-            assign Den0 = SrcBE;	    
+            assign X = SrcAE;
+            assign D = SrcBE;	    
 	 end	    
 
-	 // capture the Numerator/Denominator	 
-	 flopenrc #(`XLEN) reg_num (.d(Num0), .q(N),
-				    .en(startDivideE), .clear(DivDoneE),
-				    .reset(reset),  .clk(~gclk));
-	 flopenrc #(`XLEN) reg_den (.d(Den0), .q(D),
-				    .en(startDivideE), .clear(DivDoneE),
-				    .reset(reset),  .clk(~gclk));
-	 
-	 assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]);	 
-	 intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide);
-	 //intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(N), .D(D), .busy(DivBusyE), .done(DivDoneE), .Q(QuotE), .REM(RemE));
+	 assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]);	 
+	 //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide);
+	 intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE));
 
-	 // Added for debugging of start signal for divide
-	 assign startDivideE = MulDivE&DivStartE&~DivBusyE;
-	 
-	 // capture the start control signals since they are not held constant.
-	 // *** appears to be unused
-	 flopenrc #(3) funct3ereg (.d(Funct3E),
-				   .q(Funct3E_Q),
-				   .en(DivStartE),
-				   .clear(DivDoneE),
-				   .reset(reset),
-				   .clk(clk));
-	 
+	 // Start a divide when a new division instruction is received and the divider isn't already busy or finishing
+	 assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE;
+	 assign DivBusyE = startDivideE | busy;
+	 	 
 	 // Select result
 	 always_comb
            case (Funct3E)	   
@@ -115,19 +98,6 @@ module muldiv (
              3'b110: PrelimResultE = RemE;
              3'b111: PrelimResultE = RemE;
            endcase // case (Funct3E)
-
-	 // Start Divide process.  This simplifies to DivStartE = Funct3E[2];
-	 always_comb
-           case (Funct3E)
-             3'b000: DivStartE = 1'b0;
-             3'b001: DivStartE = 1'b0;
-             3'b010: DivStartE = 1'b0;
-             3'b011: DivStartE = 1'b0;
-             3'b100: DivStartE = 1'b1;
-             3'b101: DivStartE = 1'b1;
-             3'b110: DivStartE = 1'b1;
-             3'b111: DivStartE = 1'b1;
-           endcase
 	 
 	 // Handle sign extension for W-type instructions
 	 if (`XLEN == 64) begin // RV64 has W-type instructions
@@ -136,7 +106,7 @@ module muldiv (
             assign MulDivResultE = PrelimResultE;
 	 end
 
-	 flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM);
+	 flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM); // could let part of multiplication spill into Memory stage
 	 flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW);	 
 
       end else begin // no M instructions supported
diff --git a/wally-pipelined/testbench/common/instrTrackerTB.sv b/wally-pipelined/testbench/common/instrTrackerTB.sv
index 0283f6502..2b0ca7c50 100644
--- a/wally-pipelined/testbench/common/instrTrackerTB.sv
+++ b/wally-pipelined/testbench/common/instrTrackerTB.sv
@@ -13,5 +13,5 @@ module instrTrackerTB(
   instrNameDecTB ddec(InstrD, InstrDName);
   instrNameDecTB edec(InstrE, InstrEName);
   instrNameDecTB mdec(InstrM, InstrMName);
-  instrNameDecTB wdec(InstrW, InstrWName);
+  instrNameDecTB wdec(InstrW, InstrWName); // *** delete this because InstrW is deleted from IFU
 endmodule
diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv
index 318140769..50b447039 100644
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@@ -318,14 +318,14 @@ string tests32f[] = '{
   };
 
   string tests32m[] = '{
+    "rv32m/I-DIVU-01", "2000",
+    "rv32m/I-REMU-01", "2000",
+    "rv32m/I-DIV-01", "2000",
+    "rv32m/I-REM-01", "2000",
     "rv32m/I-MUL-01", "2000",
     "rv32m/I-MULH-01", "2000",
     "rv32m/I-MULHSU-01", "2000",
-    "rv32m/I-MULHU-01", "2000",
-    "rv32m/I-DIV-01", "2000",
-    "rv32m/I-DIVU-01", "2000",
-    "rv32m/I-REM-01", "2000",
-    "rv32m/I-REMU-01", "2000"
+    "rv32m/I-MULHU-01", "2000"
   };
 
   string tests32ic[] = '{
@@ -551,12 +551,12 @@ string tests32f[] = '{
         tests = tests32p;
       else begin
           tests = {tests32i, tests32p};//,tests32periph}; *** broken at the moment
-          if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};    
+          if (`C_SUPPORTED) tests = {tests, tests32ic};    
           else                       tests = {tests, tests32iNOc};
-          if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
           if (`F_SUPPORTED) tests = {tests32f, tests};
           if (`MEM_VIRTMEM) tests = {tests32mmu, tests};
           if (`A_SUPPORTED) tests = {tests32a, tests};
+          if (`M_SUPPORTED) tests = {tests32m, tests};
      end
     end
   end
@@ -607,9 +607,9 @@ string tests32f[] = '{
       end
       // read test vectors into memory
       memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"};
-      romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"};
+//      romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"};
       $readmemh(memfilename, dut.uncore.dtim.RAM);
-      $readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM);
+//      $readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM);
       ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"};
       ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"};
       $display("Read memfile %s", memfilename);