From afcddf7035f089ac379797155ed3ba776e36b169 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 19 Jul 2022 23:44:37 +0000
Subject: [PATCH 1/2] oprimized zeros and replaced complex ?: with always_comb

---
 pipelined/src/fpu/fcvt.sv            | 21 ++++++---
 pipelined/src/fpu/fmashiftcalc.sv    |  4 --
 pipelined/src/fpu/negateintres.sv    |  9 +++-
 pipelined/src/fpu/resultsign.sv      | 25 ++++++++---
 pipelined/src/fpu/shiftcorrection.sv |  5 ++-
 pipelined/src/fpu/specialcase.sv     | 65 ++++++++++++++++++----------
 6 files changed, 85 insertions(+), 44 deletions(-)

diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index b9932523..9d7f2d62 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -103,7 +103,7 @@ module fcvt (
     // choose the input to the leading zero counter i.e. priority encoder
     //             int -> fp : | positive integer | 00000... (if needed) | 
     //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
+    assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
                              {Xm, {`CVTLEN-`NF{1'b0}}};
     assign LzcIn = LzcInFull[`CVTLEN-1:0];
     
@@ -125,9 +125,10 @@ module fcvt (
     //              - only shift fp -> fp if the intital value is denormalized
     //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
-                    ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : 
-                              (LeadingZeros);
+    always_comb
+        if(ToInt)                       ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
+        else if (ResDenormUf&~IntToFp)  ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
+        else                            ShiftAmt = LeadingZeros;
     
     ///////////////////////////////////////////////////////////////////////////
     // exp calculations
@@ -150,7 +151,9 @@ module fcvt (
         assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); 
 
     end else if (`FPSIZES == 2) begin
-        assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+        logic [`NE-2:0] NewBiasToFp;
+        assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+        assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 
 
     end else if (`FPSIZES == 3) begin
         logic [`NE-2:0] NewBiasToFp;
@@ -177,7 +180,7 @@ module fcvt (
     // select the old exponent
     //      int -> fp : largest bias + XLEN
     //      fp -> ??? : XExp
-    assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe;
+    assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
     
     // calculate CalcExp
     //      fp -> fp : 
@@ -222,7 +225,11 @@ module fcvt (
     //          - if 64-bit : check the msb of the 64-bit integer input and if it's signed
     //          - if 32-bit : check the msb of the 32-bit integer input and if it's signed
     //      - otherwise: the floating point input's sign
-    assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs;
+    always_comb
+        if(IntToFp)
+            if(Int64)   Cs = Int[`XLEN-1]&Signed;
+            else        Cs = Int[31]&Signed;
+        else            Cs = Xs;
 
 endmodule
 
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 79953b21..d598efb7 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -42,7 +42,6 @@ module fmashiftcalc(
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
     output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
 );
-    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
     logic [`NE+1:0]             PreNormSumExp;       // the exponent of the normalized sum with the `FLEN bias
     logic [`NE+1:0] BiasCorr;
 
@@ -149,9 +148,6 @@ module fmashiftcalc(
     // Determine if the result is denormal
     // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
 
-    // Determine the shift needed for denormal results
-    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1;
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
diff --git a/pipelined/src/fpu/negateintres.sv b/pipelined/src/fpu/negateintres.sv
index dde515b9..7a696b37 100644
--- a/pipelined/src/fpu/negateintres.sv
+++ b/pipelined/src/fpu/negateintres.sv
@@ -42,7 +42,12 @@ module negateintres(
     // round and negate the positive res if needed
     assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
     
-    assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] :
-			              Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32];
+    always_comb
+        if(Signed)
+            if(Int64)   CvtNegResMsbs = CvtNegRes[`XLEN:`XLEN-1];
+            else        CvtNegResMsbs = CvtNegRes[32:31];
+        else
+            if(Int64)   CvtNegResMsbs = CvtNegRes[`XLEN+1:`XLEN];
+            else        CvtNegResMsbs = CvtNegRes[33:32];
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index b8019b98..c2912ece 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -46,11 +46,21 @@ module resultsign(
     logic Zeros;
     logic Infs;
 
-    // Determine the sign if the sum is zero
-    //      if cancelation then 0 unless round to -infinity
-    //      if multiply then Psgn
-    //      otherwise psign
-    assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
+    // The IEEE754-2019 standard specifies: 
+    //      - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity
+    //      - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result
+    //      - if x = +0 or -0 then x+x=x and x-(-x)=x 
+    //      - the sign of a product is the exclisive or or the opperand's signs
+    // Zero sign will only be selected if:
+    //      - P=Z and a cancelation occurs - exact zero
+    //      - Z is zero and P is zero - exact zero
+    //      - P is killed and Z is zero - Psgn
+    //      - Z is killed and P is zero - impossible
+    // Zero sign calculation:
+    //      - if a multiply opperation is done, then use the products sign(Ps)
+    //      - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign)
+    //      - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign
+    assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
 
 
     // is the result negitive
@@ -58,6 +68,9 @@ module resultsign(
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
     assign Infs = ZInf ? FmaAs : FmaPs;
-    assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;
+    always_comb
+        if(InfIn&FmaOp) Ws = Infs;
+        else if(FmaSZero&FmaOp) Ws = Zeros;
+        else Ws = Ms;
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index 50cffb07..514edbee 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -55,7 +55,10 @@ module shiftcorrection(
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
     assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
-    assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    always_comb
+        if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
+        else if (DivOp&~DivResDenorm)   Mf = CorrQuotShifted;
+        else                            Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
     assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv
index 3c28eae2..6014962a 100644
--- a/pipelined/src/fpu/specialcase.sv
+++ b/pipelined/src/fpu/specialcase.sv
@@ -95,9 +95,14 @@ module specialcase(
         end else begin 
             assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
         end
-        
-        assign OfRes =  OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} :
-                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
+
+        always_comb
+            if(OutFmt)
+                if(OfResMax)    OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
+                else            OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
+            else
+                if(OfResMax)    OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
+                else            OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
         assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
         assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
 
@@ -234,20 +239,21 @@ module specialcase(
     assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
     assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
     // output infinity with result sign if divide by zero
-    if(`IEEE754) begin
-        assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
-                         YNaN&~CvtOp ? YNaNRes :
-                         ZNaN&FmaOp ? ZNaNRes :
-                         Invalid ? InvalidRes : 
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end else begin
-        assign PostProcRes = NaNIn|Invalid ? InvalidRes :
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end
+    if(`IEEE754)
+        always_comb
+            if(XNaN&~(IntToFp&CvtOp))   PostProcRes = XNaNRes;
+            else if(YNaN&~CvtOp)        PostProcRes = YNaNRes;
+            else if(ZNaN&FmaOp)         PostProcRes = ZNaNRes;
+            else if(Invalid)            PostProcRes = InvalidRes;
+            else if(SelOfRes)           PostProcRes = OfRes;
+            else if(KillRes)            PostProcRes = UfRes;
+            else                        PostProcRes = NormRes;
+    else
+        always_comb
+            if(NaNIn|Invalid)           PostProcRes = InvalidRes;
+            else if(SelOfRes)           PostProcRes = OfRes;
+            else if(KillRes)            PostProcRes = UfRes;
+            else                        PostProcRes = NormRes;
 
     ///////////////////////////////////////////////////////////////////////////////////////
     //
@@ -272,10 +278,17 @@ module specialcase(
     //        unsigned | 2^32-1 | 2^64-1 |
     //
     //      other: 32 bit unsinged res should be sign extended as if it were a signed number
-    assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
-                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
-                               Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive
-                                              {`XLEN{1'b1}};// unsigned positive
+    always_comb
+        if(Signed)
+            if(Xs&~XNaN)    // signed negitive
+                if(Int64)   OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
+                else        OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
+            else            // signed positive
+                if(Int64)   OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
+                else        OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
+        else
+            if(Xs&~XNaN)    OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
+            else            OfIntRes = {`XLEN{1'b1}}; // unsigned positive
 
 
     // select the integer output
@@ -284,7 +297,11 @@ module specialcase(
     //          - if rounding and signed opperation and negitive input, output -1
     //          - otherwise output a rounded 0
     //      - otherwise output the normal res (trmined and sign extended if nessisary)
-    assign FCvtIntRes = IntInvalid ?  OfIntRes :
-			            CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
-                        Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
+    always_comb
+        if(IntInvalid)          FCvtIntRes = OfIntRes;
+        else if(CvtCe[`NE]) 
+            if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
+            else                FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
+        else if(Int64)          FCvtIntRes = CvtNegRes[`XLEN-1:0];
+        else                    FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
 endmodule
\ No newline at end of file

From db39a05abc7bd3f7519d62f44413dfdd6c3f8b45 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 20 Jul 2022 01:36:25 +0000
Subject: [PATCH 2/2] small changes

---
 pipelined/src/fpu/divsqrt.sv |  2 +-
 pipelined/src/fpu/srt.sv     | 12 ++++++------
 pipelined/srt/srt.sv         | 29 +++++++++++++++++------------
 3 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index cbf7f95f..ffc60026 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -41,7 +41,7 @@ module divsqrt(
   input  logic XNaNE, YNaNE, 
   input  logic DivStartE, 
   input  logic StallM,
-  input logic StallE,
+  input  logic StallE,
   output logic DivStickyM,
   output logic DivBusy,
   output logic DivDone,
diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv
index 9e031511..ee5ae9a3 100644
--- a/pipelined/src/fpu/srt.sv
+++ b/pipelined/src/fpu/srt.sv
@@ -34,18 +34,18 @@ module srt(
   input  logic clk,
   input  logic DivStart, 
   input  logic DivBusy, 
-  input logic  [`FMTBITS-1:0] FmtE,
+  input  logic [`FMTBITS-1:0] FmtE,
   input  logic [`NE-1:0] Xe, Ye,
   input  logic XZeroE, YZeroE, 
-  input logic [`DIVLEN-1:0] X,
-  input logic [`DIVLEN-1:0] Dpreproc,
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  input logic NegSticky,
+  input  logic [`DIVLEN-1:0] X,
+  input  logic [`DIVLEN-1:0] Dpreproc,
+  input  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input  logic NegSticky,
   output logic [`QLEN-1-(`RADIX/4):0] Quot,
   output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  StickyWSA,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
-  output logic  [`NE+1:0] DivCalcExpM,
+  output logic [`NE+1:0] DivCalcExpM,
   output logic [`XLEN-1:0] Rem
 );
 
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 949335bf..13a59d84 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -55,7 +55,7 @@ module srt (
   logic                       qp, qz, qn; // quotient is +1, 0, or -1
   logic [`NE-1:0]             calcExp;
   logic                       calcSign;
-  logic [`DIVLEN+3:0]         X, Dpreproc, C, F, AddIn;
+  logic [`DIVLEN+3:0]         X, Dpreproc, C, F, S, SM, AddIn;
   logic [`DIVLEN+3:0]         WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
   logic                       intSign;
@@ -90,8 +90,9 @@ module srt (
   // If only implementing division, use divide otfc
   // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
   // otherwise use sotfc
-  creg   sotfcC(clk, Start, C);
-  sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F);
+  creg   sotfcC(clk, Start, Sqrt, C);
+  sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM);
+  fsel2 fsel(qp, qn, C, S, SM, F);
 
   // Adder input selection
   assign AddIn = Sqrt ? F : Dsel;
@@ -214,11 +215,16 @@ module fsel2 (
   // Generate for both positive and negative bits
   assign FP = ~S & C;
   assign FN = SM | (C & (~C << 2));
-  assign FZ = {(`DIVLEN+4){1'b0}};
+  assign FZ = '0;
 
   // Choose which adder input will be used
 
-  assign F = sp ? FP : (sn ? FN : FZ);
+  always_comb
+    if (sp)       F = FP;
+    else if (sn)  F = FN;
+    else          F = FZ;
+
+  // assign F = sp ? FP : (sn ? FN : FZ);
 
 endmodule
 
@@ -266,17 +272,18 @@ module sotfc2(
   input  logic         clk,
   input  logic         Start,
   input  logic         sp, sn,
+  input  logic         Sqrt,
   input  logic [`DIVLEN+3:0] C,
   output logic [`DIVLEN-2:0] Sq,
-  output logic [`DIVLEN+3:0] F
+  output logic [`DIVLEN+3:0] S, SM
 );
   //  The on-the-fly converter transfers the square root 
   //  bits to the quotient as they come.
   //  Use this otfc for division and square root.
-  logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
+  logic [`DIVLEN+3:0] SNext, SMNext, SMux;
 
   flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
-  mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux);
+  mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux);
   flop #(`DIVLEN+4) Sreg(clk, SMux, S);
 
   always_comb begin
@@ -292,9 +299,6 @@ module sotfc2(
     end 
   end
   assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
-
-  fsel2 fsel(sp, sn, C, S, SM, F);
-
 endmodule
 
 //////////////////////////
@@ -302,11 +306,12 @@ endmodule
 //////////////////////////
 module creg(input  logic clk,
             input  logic Start,
+            input  logic Sqrt,
             output logic [`DIVLEN+3:0] C
 );
   logic [`DIVLEN+3:0] CMux;
 
-  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux);
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux);
   flop #(`DIVLEN+4) cflop(clk, CMux, C);
 endmodule