diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index 19679aa55..9af93fb37 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -55,7 +55,6 @@ module fdivsqrt(
 //   output logic [`XLEN-1:0] RemM,
 );
 
-  logic [`DIVb+3:0]  NextWSN, NextWCN;
   logic [`DIVb+3:0]  WS, WC;
   logic [`DIVb+3:0] X;
   logic [`DIVN-2:0]  D; // U0.N-1
@@ -77,7 +76,7 @@ module fdivsqrt(
     .XInfE, .YInfE, .WZero, .SpecialCaseM);
   fdivsqrtiter fdivsqrtiter(
     .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, 
-    .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, 
+    .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
     .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
     .DivBusy);
   fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZero, .DivSM);
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
index d13d706f4..5c067796c 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
@@ -41,7 +41,6 @@ module fdivsqrtiter(
   input  logic [`DIVb+3:0] X,
   input  logic [`DIVN-2:0] Dpreproc,
   output logic [`DIVN-2:0]  D, // U0.N-1
-  output logic [`DIVb+3:0]  NextWSN, NextWCN,
   output logic [`DIVb:0] FirstU, FirstUM,
   output logic [`DIVb+1:0] FirstC,
   output logic             Firstun,
@@ -56,12 +55,12 @@ module fdivsqrtiter(
 // U/UM should be 1.b so b+1 bits or b:0
 // C needs to be the lenght of the final fraction 0.b so b or b-1:0
  /* verilator lint_off UNOPTFLAT */
-  logic [`DIVb+3:0]  WSA[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]  WCA[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]  WS[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]  WC[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb:0] U[`DIVCOPIES-1:0]; // U1.b
-  logic [`DIVb:0] UM[`DIVCOPIES-1:0];// 1.b
+  logic [`DIVb+3:0]  WSNext[`DIVCOPIES-1:0]; // Q4.b
+  logic [`DIVb+3:0]  WCNext[`DIVCOPIES-1:0]; // Q4.b
+  logic [`DIVb+3:0]  WS[`DIVCOPIES:0]; // Q4.b
+  logic [`DIVb+3:0]  WC[`DIVCOPIES:0]; // Q4.b
+  logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
+  logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b
   logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b
   logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b
   logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
@@ -79,31 +78,35 @@ module fdivsqrtiter(
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
-  // Otherwise, the divisor is retained and the partial remainder
-  // is fed back for the next iteration.
-  //  - when the start signal is asserted X and 0 are loaded into WS and WC
-  //  - otherwise load WSA into the flipflop
-  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
-  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  assign NextWSN = WSA[`DIVCOPIES-1] << `LOGR;
-  assign NextWCN = WCA[`DIVCOPIES-1] << `LOGR;
-
-  // Initialize C to -1 for sqrt and -R for division
-  logic [1:0] initCSqrt, initCDiv2, initCDiv4, initCUpper;
-  assign initCSqrt = 2'b11; // -1
-  assign initCDiv2 = 2'b10; // -2
-  assign initCDiv4 = 2'b00; // -4
-  assign initCUpper = SqrtE ? initCSqrt : (`RADIX == 4) ? initCDiv4 : initCDiv2;
-  assign initC = {initCUpper, {`DIVb{1'b0}}};
-
-  mux2   #(`DIVb+4) wsmux(NextWSN, X, DivStartE, WSN);
+  // Otherwise, the divisor is retained and the residual and result
+  // are fed back for the next iteration.
+ 
+  // Residual WS/SC registers/initializaiton mux
+  mux2   #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN);
+  mux2   #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN);
   flopen   #(`DIVb+4) wsflop(clk, DivStartE|DivBusy, WSN, WS[0]);
-  mux2   #(`DIVb+4) wcmux(NextWCN, '0, DivStartE, WCN);
   flopen   #(`DIVb+4) wcflop(clk, DivStartE|DivBusy, WCN, WC[0]);
-  flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D);
+
+  // UOTFC Result U and UM registers/initialization mux
+  // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
+  assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
+  assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
+  mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux);
+  mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux);
+  flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]);
+  flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]);
+
+  // C register/initialization mux
+  // Initialize C to -1 for sqrt and -R for division
+  logic [1:0] initCUpper;
+  assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
+  assign initC = {initCUpper, {`DIVb{1'b0}}};
   mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStartE, CMux); 
   flopen #(`DIVb+2) cflop(clk, DivStartE|DivBusy, CMux, C[0]);
 
+   // Divisior register
+  flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D);
+
   // Divisor Selections
   //  - choose the negitive version of what's being selected
   //  - D is only the fraction
@@ -113,37 +116,29 @@ module fdivsqrtiter(
     assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
   end
 
+  // k=DIVCOPIES of the recurrence logic
   genvar i;
   generate
     for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
       if (`RADIX == 2) begin: stage
         fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtM,
-        .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), 
+        .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), 
         .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
       end else begin: stage
         logic j1;
         assign j1 = (i == 0 & ~C[0][`DIVb-1]);
         fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1,
-        .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), 
+        .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), 
         .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
       end
-      if(i<(`DIVCOPIES-1)) begin 
-        assign WS[i+1] = WSA[i] << `LOGR;
-        assign WC[i+1] = WCA[i] << `LOGR;
-        assign U[i+1] = UNext[i];
-        assign UM[i+1] = UMNext[i];
-      end
+      assign WS[i+1] = WSNext[i];
+      assign WC[i+1] = WCNext[i];
+      assign U[i+1]  = UNext[i];
+      assign UM[i+1] = UMNext[i];
     end
   endgenerate
 
-  // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
-  assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
-  assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
-  mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux);
-  mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux);
-  flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]);
-  flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]);
-  
+  // Send values from start of cycle for postprocessing
   assign FirstWS = WS[0];
   assign FirstWC = WC[0];
   assign FirstU = U[0];
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv
index 4379724ff..73f4e4425 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv
@@ -31,19 +31,18 @@
 `include "wally-config.vh"
 
 module fdivsqrtqsel4 (
-  input logic [`DIVN-2:0] D,
+  input logic [2:0] Dmsbs,
   input logic [4:0] Smsbs,
-  input logic [`DIVb+3:0] WS, WC,
+  input logic [7:0] WSmsbs, WCmsbs,
   input logic Sqrt, j1,
   output logic [3:0] udigit
 );
 	logic [6:0] Wmsbs;
 	logic [7:0] PreWmsbs;
-	logic [2:0] Dmsbs, A;
+	logic [2:0] A;
 
-	assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4];
+	assign PreWmsbs = WCmsbs + WSmsbs;
 	assign Wmsbs = PreWmsbs[7:1];
-	assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}};
 	// D = 0001.xxx...
 	// Dmsbs = |   |
   // W =      xxxx.xxx...
@@ -51,6 +50,7 @@ module fdivsqrtqsel4 (
 
 	logic [3:0] USel4[1023:0];
 
+  // Prepopulate selection table; this is constant at compile time
   always_comb begin 
     integer a, w, i, w2;
     for(a=0; a<8; a++)
@@ -101,12 +101,15 @@ module fdivsqrtqsel4 (
         endcase
       end
   end
+
+  // Select A
   always_comb
     if (Sqrt) begin 
       if (j1) A = 3'b101;
       else if (Smsbs == 5'b10000) A = 3'b111;
       else A = Smsbs[2:0];
     end else A = Dmsbs;
+
+  // Select quotient digit from lookup table based on A and W
 	assign udigit = USel4[{A,Wmsbs}];
-	
 endmodule
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv
new file mode 100644
index 000000000..de4c22a18
--- /dev/null
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv
@@ -0,0 +1,93 @@
+///////////////////////////////////////////
+// fdivsqrtqsel4cmp.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu 
+// Modified:13 January 2022
+//
+// Purpose: Comparator-based Radix 4 Quotient Digit Selection
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module fdivsqrtqsel4cmp (
+  input logic [2:0] Dmsbs,
+  input logic [4:0] Smsbs,
+  input logic [7:0] WSmsbs, WCmsbs,
+  input logic Sqrt, j1,
+  output logic [3:0] udigit
+);
+	logic [6:0] Wmsbs;
+	logic [7:0] PreWmsbs;
+	logic [2:0] A;
+
+	assign PreWmsbs = WCmsbs + WSmsbs;
+	assign Wmsbs = PreWmsbs[7:1];
+	// D = 0001.xxx...
+	// Dmsbs = |   |
+  // W =      xxxx.xxx...
+	// Wmsbs = |        |
+
+  logic [6:0] mk2, mk1, mk0, mkm1;
+  logic [6:0] mks2[7:0], mks1[7:0]; 
+
+  // Prepopulate table of mks0
+  assign mks2[0] = 12;
+  assign mks2[1] = 14;
+  assign mks2[2] = 16;
+  assign mks2[3] = 17;
+  assign mks2[4] = 18;
+  assign mks2[5] = 20;
+  assign mks2[6] = 22;
+  assign mks2[7] = 23;
+  assign mks1[0] = 4;
+  assign mks1[1] = 4;
+  assign mks1[2] = 6;
+  assign mks1[3] = 6;
+  assign mks1[4] = 6;
+  assign mks1[5] = 8; // is the logic any cheaper if this is a 6?
+  assign mks1[6] = 8;
+  assign mks1[7] = 8;
+
+  // Choose A for current operation
+ always_comb
+    if (Sqrt) begin 
+      if (j1) A = 3'b101;
+      else if (Smsbs == 5'b10000) A = 3'b111;
+      else A = Smsbs[2:0];
+    end else A = Dmsbs;
+
+  // Choose selection constants based on a
+  assign mk2 = mks2[A];
+  assign mk1 = mks1[A];
+  assign mk0 = -mks1[A];
+  assign mkm1 = (A == 3'b000) ? -13 : -mks2[A]; // asymmetry in table
+ 
+  // Compare residual W to selection constants to choose digit
+  always_comb 
+    if ($signed(Wmsbs) >= $signed(mk2)) udigit = 4'b1000; // choose 2
+    else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1
+    else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0
+    else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1
+    else udigit = 4'b0001; // choose -2	
+endmodule
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
index 987f23576..8ed1664af 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
@@ -41,7 +41,7 @@ module fdivsqrtstage2 (
   output logic un,
   output logic [`DIVb+1:0] CNext,
   output logic [`DIVb:0] UNext, UMNext, 
-  output logic [`DIVb+3:0]  WSA, WCA
+  output logic [`DIVb+3:0]  WSNext, WCNext
 );
  /* verilator lint_on UNOPTFLAT */
 
@@ -49,8 +49,7 @@ module fdivsqrtstage2 (
   logic up, uz;
   logic [`DIVb+3:0] F;
   logic [`DIVb+3:0] AddIn;
-
-  assign CNext = {1'b1, C[`DIVb+1:1]};
+  logic [`DIVb+3:0]  WSA, WCA;
 
   // Qmient Selection logic
   // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
@@ -61,8 +60,11 @@ module fdivsqrtstage2 (
 	// 0010 = -1
 	// 0001 = -2
   fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
+
+  // Sqrt F generatin
   fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F);
 
+  // Divisor multiple
   always_comb
     if      (up) Dsel = DBar;
     else if (uz) Dsel = '0; // qz
@@ -72,7 +74,13 @@ module fdivsqrtstage2 (
   //  WSA, WCA = WS + WC - qD
   assign AddIn = SqrtM ? F : Dsel;
   csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtM, WSA, WCA);
+  assign WSNext = WSA << 1;
+  assign WCNext = WCA << 1;
 
+  // Shift thermometer code C
+  assign CNext = {1'b1, C[`DIVb+1:1]};
+
+  // Unified On-The-Fly Converter to accumulate result
   fdivsqrtuotfc2 uotfc2(.up, .uz, .C(CNext), .U, .UM, .UNext, .UMNext);
 endmodule
 
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
index e463762a2..05792293c 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@@ -30,7 +30,6 @@
 
 `include "wally-config.vh"
 
-/* verilator lint_off UNOPTFLAT */
 module fdivsqrtstage4 (
   input logic [`DIVN-2:0] D,
   input logic [`DIVb+3:0]  DBar, D2, DBar2,
@@ -41,17 +40,18 @@ module fdivsqrtstage4 (
   input logic SqrtM, j1,
   output logic un,
   output logic [`DIVb:0] UNext, UMNext, 
-  output logic [`DIVb+3:0]  WSA, WCA
+  output logic [`DIVb+3:0]  WSNext, WCNext
 );
- /* verilator lint_on UNOPTFLAT */
 
   logic [`DIVb+3:0]  Dsel;
   logic [3:0]     udigit;
   logic [`DIVb+3:0] F;
   logic [`DIVb+3:0] AddIn;
   logic [4:0] Smsbs;
+  logic [2:0] Dmsbs;
+  logic [7:0] WCmsbs, WSmsbs;
   logic CarryIn;
-  assign CNext = {2'b11, C[`DIVb+1:2]};
+  logic [`DIVb+3:0]  WSA, WCA;
 
   // Digit Selection logic
   // u encoding:
@@ -61,28 +61,40 @@ module fdivsqrtstage4 (
 	// 0010 = -1
 	// 0001 = -2
   assign Smsbs = U[`DIVb:`DIVb-4];
-  fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .udigit);
+  assign Dmsbs = D[`DIVN-2:`DIVN-4];
+  assign WCmsbs = WC[`DIVb+3:`DIVb-4];
+  assign WSmsbs = WS[`DIVb+3:`DIVb-4];
+
+  fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtM), .j1, .udigit);
+  assign un = 0; // unused for radix 4
+
+  // F generation logic
   fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
 
+  // Divisor multiple logic
   always_comb
-  case (udigit)
-    4'b1000: Dsel = DBar2;
-    4'b0100: Dsel = DBar;
-    4'b0000: Dsel = '0;
-    4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
-    4'b0001: Dsel = D2;
-    default: Dsel = 'x;
-  endcase
+    case (udigit)
+      4'b1000: Dsel = DBar2;
+      4'b0100: Dsel = DBar;
+      4'b0000: Dsel = '0;
+      4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
+      4'b0001: Dsel = D2;
+      default: Dsel = 'x;
+    endcase
 
-  // Partial Product Generation
-  //  WSA, WCA = WS + WC - qD
+  // Residual Update
+  //  {WS, WC}}Next = (WS + WC - qD or F) << 2
   assign AddIn = SqrtM ? F : Dsel;
   assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D 
   csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
- 
-  fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
+  assign WSNext = WSA << 2;
+  assign WCNext = WCA << 2;
 
-  assign un = 0; // unused for radix 4
+  // Shift thermometer code C
+  assign CNext = {2'b11, C[`DIVb+1:2]};
+ 
+  // On-the-fly converter to accumulate result
+  fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
 endmodule