Merge pull request #416 from stineje/main

Update to Wally for test float fixes and other ancillary quality improvements
2025-02-11 06:05:49 +00:00 · 2023-10-03 09:36:17 -05:00 · 2023-10-03 09:36:17 -05:00 · b0ce2cac1c
commit b0ce2cac1c
parent 5de6b466b7 0e5b8b9179
11 changed files with 1017 additions and 87 deletions
--- a/bin/wally-tool-chain-install.sh
+++ b/bin/wally-tool-chain-install.sh
@ -120,8 +120,8 @@ sudo apt-get install -y perl g++ ccache help2man libgoogle-perftools-dev numactl
 sudo apt-get install -y libfl2  libfl-dev  # Ubuntu only (ignore if gives error)
 cd $RISCV
 git clone https://github.com/verilator/verilator   # Only first time
-unsetenv VERILATOR_ROOT  # For csh; ignore error if on bash
-unset VERILATOR_ROOT  # For bash
+# unsetenv VERILATOR_ROOT  # For csh; ignore error if on bash
+unset VERILATOR_ROOT     # For bash
 cd verilator
 git pull         # Make sure git repository is up-to-date
 git checkout master      # Use development branch (e.g. recent bug fixes)
@ -157,6 +157,8 @@ opam install sail -y
 eval $(opam config env)
 git clone https://github.com/riscv/sail-riscv.git
 cd sail-riscv
+# For now, use checkout that is stable for Wally
+git checkout 72b2516d10d472ac77482fd959a9401ce3487f60
 make -j ${NUM_THREADS}
 ARCH=RV32 make -j ${NUM_THREADS}
 sudo ln -sf $RISCV/sail-riscv/c_emulator/riscv_sim_RV64 /usr/bin/riscv_sim_RV64
--- a/docs/divsqrt_tex/div2.tex
+++ b/docs/divsqrt_tex/div2.tex
@ -0,0 +1,123 @@
+\documentclass[12pt]{article}
+
+\usepackage{amssymb, amsmath, amsfonts, amsthm, graphicx, tcolorbox}
+
+\usepackage{arydshln}
+
+\parindent  = 0in
+
+\pagestyle{empty}
+
+%==========
+%==========
+
+\begin{document}
+
+\begin{center}
+\begin{tabular}{cccc}
+    Initialization&$D$&$0001.1010\ 000$&\\
+    &$-D=\overline{D}+1$&$1110.0101\ 111$&(+ 1 ulp)\\
+    &&&\\
+    &$WS_{-1}=X$&$0001.0000\ 010$&\\
+    &$WC_{-1}$&$0000.0000\ 000$&\\
+    \hdashline\\
+    Step 0:&$WS_{-1}$&$0001.0000\ 010$&\\
+    &$WC_{-1}$&$0000.0000\ 00\mathbf{1}$&($W_{msbs}=0001\ \text{so}\ q_0=1$)\\
+    &$-q_0D$&$1110.0101\ 111$&\\
+    \cline{2-3}
+    &$sum$&$1111.0101\ 100$&$\ll1$\\
+    &$carry$&$0000.0000\ 110$&$\ll1$\\
+    \hdashline\\
+    Step 1:&$WS_0$&$1110.1011\ 000$&\\
+    &$WC_0$&$0000.0001\ 10\mathbf{0}$&($W_{msbs}=1110\ \text{so}\ q_1=-1$)\\
+    &$-q_1D$&$0001.1010\ 000$&\\
+    \cline{2-3}
+    &$sum$&$1111.0000\ 100$&$\ll1$\\
+    &$carry$&$0001.0110\ 000$&$\ll1$\\
+    \hdashline\\
+    Step 2:&$WS_1$&$1110.0001\ 000$&\\
+    &$WC_1$&$0010.1100\ 00\mathbf{1}$&($W_{msbs}=0000\ \text{so}\ q_2=1$)\\
+    &$-q_2D$&$1110.0101\ 111$&\\
+    \cline{2-3}
+    &$sum$&$0010.1000\ 110$&$\ll1$\\
+    &$carry$&$1100.1010\ 010$&$\ll1$\\
+    \hdashline\\
+    Step 3:&$WS_2$&$0101.0001\ 100$&\\
+    &$WC_2$&$1001.0100\ 10\mathbf{0}$&($W_{msbs}=1110\ \text{so}\ q_3=-1$)\\
+    &$-q_3D$&$0001.1010\ 000$&\\
+    \cline{2-3}
+    &$sum$&$1101.1111\ 000$&\\
+    &$carry$&$0010.0001\ 000$&$sum+carry=0$, terminate.\\
+    \hdashline\\
+    Terminate&Quotient&0.101
+\end{tabular}
+\end{center}
+
+\vfill
+\eject
+
+X = 1.0110\ 011 (179/128)
+
+D = 1.0011\ 000 (152/128)
+
+Q = 1.0010\ 1101\ 0
+
+D[1.3] = 1.001, so we use the ``"1.001" column of chart 13.X. This means we select a quotient bit of 2 if the partial remainder is greater than or equal to 3.5, a quotient bit of 1 if the partial is greater or equal to than 1.0, a zero if the partial is greater than or equal to -1.5, -1 if the partial is greater than or equal to -3.75, and a -2 otherwise.
+\begin{center}
+\begin{tabular}{cccc}
+    Initialization&$D$&$0001.0011\ 000$&\\
+    &$2D$&$0010.0110\ 000$&\\
+    &$-D=\overline{D}+1$&$1110.1100\ 111$&(+ 1 ulp)\\
+    &$-2D=\overline{2D}+1$&$1101.1001\ 111$&(+ 1 ulp)\\
+    &&&\\
+    &$X=WS$&$0001.0110\ 011$&\\
+    &$WC$&$0000.0000\ 000$&\\
+    \hdashline\\
+    Step 4:&$WS$&$0001.0110\ 011$&\\
+    &$WC$&$0000.0000\ 00\mathbf{1}$&($RW_{msbs}=0001.010\ \text{so}\ q_4=1$)\\
+    &$-q_7D$&$1110.1100\ 111$&\\
+    \cline{2-3}
+    &$WS$&$1111.1010\ 101$&$\ll2$\\
+    &$WC$&$0000.1000\ 110$&$\ll2$\\
+    \hdashline\\
+    Step 3:&$WS$&$1110.1010\ 100$&\\
+    &$WC$&$0010.0011\ 000$&($RW_{msbs}=0000.110\ \text{so}\ q_3=1$)\\
+    &$-q_6D$&$0000.0000\ 000$&\\
+    \cline{2-3}
+    &$WS$&$1100.1001\ 100$&$\ll2$\\
+    &$WC$&$0100.0100\ 000$&$\ll2$\\
+    \hdashline\\
+    Step 2:&$WS$&$0010.0110\ 000$&\\
+    &$WC$&$0001.0000\ 00\mathbf{1}$&($RW_{msbs}=0011.010\ \text{so}\ q_2=-1$)\\
+    &$-q_5D$&$1110.0101\ 111$&\\
+    \cline{2-3}
+\end{tabular}
+\end{center}
+
+page 269 306
+
+\vfill 
+\eject
+
+\large{\bf{
+Math for the recurrence relation}}
+
+**going to have to change notation for sure, change the subscripts for steps and might have to get rid of some exponents**
+\begin{align*}
+    w[j+1] &= r^{j+1}\big(x-S[j+1]^2\big)\\
+    &= r^{j+1}\big(x-(S[j]+s_{j+1}r^{-(j+1)})^2\big)\\
+    &= r^{j+1}x-r^{j+1}\big(S[j]^2+2S[j]s_{j+1}r^{-(j+1)}+s^2_{j+1}r^{-2(j+1)}\big)\\
+    &= r^{j+1}\big(x-S[j]^2\big)-\big(2S[j]s_{j+1}+s_{j+1}^2r^{-(j+1)}\big)\\
+    &= rw[j]-\big(2S[j]s_{j+1}+s_{j+1}^2r^{-(j+1)}\big)\\
+    &= rw[j]+F[j]
+\end{align*}
+where
+\begin{align*}
+    F[j]=-\big(2S[j]s_{j+1}+s_{j+1}^2r^{-(j+1)}\big)
+\end{align*}
+
+Since there is a term of $S$ in the expression of $F$, we must come up with a way to represent $S$ using only zeros and ones, rather than using the bit set $\{-a,\ldots,a\}$. This is done using on-the-fly conversion just as we did to compute the quotient for the divider. We keep a running copy of $S$, but we also keep the value $SM=S-1$. The logic is still the same for computing $S$ and $SM$ on the next step; see figure 13.15. 
+
+Now that $S$ is in a form such that we can use it in a CSA, we need to compute $F$. To do so, 
+
+\end{document}
--- a/docs/divsqrt_tex/div4.tex
+++ b/docs/divsqrt_tex/div4.tex
@ -0,0 +1,84 @@
+\documentclass[12pt]{article}
+
+\usepackage{amssymb, amsmath, amsfonts, amsthm, graphicx, tcolorbox}
+
+\usepackage{arydshln}
+
+\parskip = .2in
+\parindent  = 0in
+
+\pagestyle{empty}
+
+%==========
+%==========
+
+\begin{document}
+
+
+X = 1.0000\ 1101 (269/256)
+
+D = 1.0011\ 0110 (310/256)
+
+Q = 0.1101\ 1110 (222/256)
+
+D[1.3] = 1.001, so we use the ``1.001" column of chart 13.X. This means we select a quotient bit of 2 if the partial remainder is greater than or equal to 3.5, a quotient bit of 1 if the partial is greater or equal to than 1.0, a zero if the partial is greater than or equal to -1.5, -1 if the partial is greater than or equal to -3.75, and a -2 otherwise.
+
+{\small
+\begin{center}
+\begin{tabular}{cccc}
+    Initialization&$D$&$0001.0011\ 0110\ 00$&\\
+    &$2D$&$0010.0110\ 1100\ 00$&\\
+    &$-D=\overline{D}+1$&$1110.1100\ 1001\ 11$&(+ 1 ulp)\\
+    &$-2D=\overline{2D}+1$&$1101.1001\ 0011\ 11$&(+ 1 ulp)\\
+    &&&\\
+    &$WS_{-1}=X$&$0001.0000\ 1101\ 00$&\\
+    &$WC_{-1}$&$0000.0000\ 0000\ 00$&\\
+    \hdashline\\
+    Step 0: &$WS_{-1}   $&$0001.0000\ 1101\ 00$&\\
+            &$WC_{-1}   $&$0000.0000\ 0000\ 0\mathbf{1}$&($W_{msbs}=0001.000\ \text{so}\ q_0=1$)\\
+            &$-q_0D     $&$1110.1100\ 1001\ 11$&\\
+    \cline{2-3}
+            &$sum       $&$1111.1100\ 0100\ 10$&$\ll2$\\
+            &$carry     $&$0000.0001\ 0010\ 10$&$\ll2$\\
+    \hdashline\\
+    Step 1: &$WS_0      $&$1111.0001\ 0010\ 00$&\\
+            &$WC_0      $&$0000.0100\ 1010\ 0\mathbf{0}$&($W_{msbs}=1111.010\ \text{so}\ q_1=-1$)\\
+            &$-q_1D     $&$0001.0011\ 0110\ 00$&\\
+    \cline{2-3}
+            &$sum       $&$1110.0110\ 1110\ 00$&$\ll2$\\
+            &$carry     $&$0010.0010\ 0100\ 00$&$\ll2$\\
+    \hdashline\\
+    Step 2: &$WS_1      $&$1001.1011\ 1000\ 00$&\\
+            &$WC_1      $&$1000.1001\ 0000\ 0\mathbf{1}$&($W_{msbs}=0010.010\ \text{so}\ q_2=2$)\\
+            &$-q_2D     $&$1101.1001\ 0011\ 11$\\
+    \cline{2-3}
+            &$sum       $&$1100.1011\ 1011\ 10$&$\ll2$\\
+            &$carry     $&$0011.0010\ 0000\ 10$&$\ll2$\\
+    \hdashline\\
+    Step 3: &$WS_2      $&$0010.1110\ 1110\ 00$&\\
+            &$WC_2      $&$1100.1000\ 0010\ 0\mathbf{0}$&($W_{msbs}=1111.011\ \text{so}\ q_3=-1$)\\
+            &$-q_3D     $&$0001.0011\ 0110\ 00$\\
+    \cline{2-3}
+            &$sum       $&$1111.0101\ 1010\ 00$&$\ll2$\\
+            &$carry     $&$0001.0100\ 0100\ 00$&$\ll2$\\
+    \hdashline\\
+    Step 4: &$WS_3      $&$1101.0110\ 1000\ 00$&\\
+            &$WC_3      $&$0101.0001\ 0000\ 0\mathbf{1}$&($W_{msbs}=0010.011\ \text{so}\ q_4=2$)\\
+            &$-q_4D     $&$1101.1001\ 0011\ 11$\\
+    \cline{2-3}
+            &$sum       $&$0101.1110\ 1011\ 10$&$\ll2$\\
+            &$carry     $&$1010.0010\ 0000\ 10$&$\ll2$\\
+    \hdashline\\
+    Step 5: &$WS_4      $&$0111.1010\ 1110\ 00$&\\
+            &$WC_4      $&$1000.1000\ 0010\ 0\mathbf{0}$&($W_{msbs}=0000.001\ \text{so}\ q_5=0$)\\
+            &$-q_5D     $&$0000.0000\ 0000\ 00$\\
+    \cline{2-3}
+            &$sum       $&$1111.0010\ 1100\ 00$&$\ll2$\\
+            &$carry     $&$0001.0000\ 0100\ 00$&$\ll2$\\
+    \hdashline\\
+    Terminate&Quotient&$00.11\ 01\ 11\ 10\ (00\ 1)$
+\end{tabular}
+\end{center}
+}
+
+\end{document}
--- a/docs/divsqrt_tex/sqrt2.tex
+++ b/docs/divsqrt_tex/sqrt2.tex
@ -0,0 +1,385 @@
+\documentclass[12pt]{article}
+\usepackage{amssymb, amsmath, amsfonts, amsthm, graphicx, tcolorbox}
+\usepackage{arydshln}
+
+\parskip = .2in
+\parindent  = 0in
+
+\pagestyle{empty}
+
+%==========
+%==========
+
+\begin{document}
+
+X = 0.0111 0011 1001 (1849/4096)
+
+S = 0.1010 1100 0000 (2752/4096)
+{\small
+\begin{center}
+\begin{tabular}{ccccc}
+            &$X             $&$         0000.0111\ 0011\ 1001   $&          &$S_0={\color{blue}0001}.0000\ 0000\ 0000$\\
+            &$WS_0=2(X-1)   $&$         1110.1110\ 0111\ 0010   $&          &$SM_0={\color{blue}0000}.0000\ 0000\ 0000\phantom{M}$\\
+            &$WC_0          $&$         0000.0000\ 0000\ 0000   $&          &$K_0=0001.0000\ 0000\ 0000\ $\\
+            &                &                                   &          &$C_0=1111.0000\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 1: &$WS_0          $&$         1110.1110\ 0111\ 0010   $&          &\\
+            &$WC_0          $&$         0000.0000\ 0000\ 0000   $&          &($W_{msbs}=1110\ \text{so}\ s_1=-1$)\\
+            &$F_1=2S_0-K_1 $&${\color{blue}0001.1}000\ 0000\ 0000$&          &$S_1={\color{blue}0000.1}000\ 0000\ 0000$\\
+            &                &                                   &          &$SM_1={\color{blue}0000.0}000\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.0110\ 0111\ 0010   $&$\ll1    $&$K_1=0000.1000\ 0000\ 0000\ $\\
+            &$carry         $&$         0001.0000\ 0000\ 0000   $&$\ll1    $&$C_1=1111.1000\ 0000\ 0000\,$\\
+    \hdashline\\    
+    Step 2: &$WS_1          $&$         1110.1100\ 1110\ 0100   $&          &\\
+            &$WC_1          $&$         0010.0000\ 0000\ 0000   $&          &($W_{msbs}=0000\ \text{so}\ s_2=1$)\\
+            &$F_2=-2S_1-K_2$&${\color{blue}1110.11}00\ 0000\ 0000$&          &$S_2={\color{blue}0000.11}00\ 0000\ 0000$\\
+            &                &                                   &          &$SM_2={\color{blue}0000.10}00\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         0010.0000\ 1110\ 0100   $&$\ll1    $&$K_2=0000.0100\ 0000\ 0000\ $\\
+            &$carry         $&$         1101.1000\ 0000\ 0000   $&$\ll1    $&$C_2=1111.1100\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 3: &$WS_2          $&$         0100.0001\ 1100\ 1000   $&          &\\
+            &$WC_2          $&$         1011.0000\ 0000\ 0000   $&          &($W_{msbs}=1111\ \text{so}\ s_3=0$)\\
+            &$F_3=0       $&${\color{blue}0000.000}0\ 0000\ 0000$&          &$S_3={\color{blue}0000.110}0\ 0000\ 0000$\\
+            &                &                                   &          &$SM_3={\color{blue}0000.101}0\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.0001\ 1100\ 1000   $&$\ll1    $&$K_3=0000.0010\ 0000\ 0000\ $\\
+            &$carry         $&$         0000.0000\ 0000\ 0000   $&$\ll1    $&$C_3=1111.1110\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 4: &$WS_3          $&$         1110.0011\ 1001\ 0000   $&          &\\
+            &$WC_3          $&$         0000.0000\ 0000\ 0000   $&          &($W_{msbs}=1110\ \text{so}\ s_4=-1$)\\
+            &$F_4=2S_3-K_4 $&${\color{blue}0001.0111}\ 0000\ 0000$&          &$S_4={\color{blue}0000.1011}\ 0000\ 0000$\\
+            &                &                                   &          &$SM_4={\color{blue}0000.1010}\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.0100\ 1001\ 0000   $&$\ll1    $&$K_4=0000.0001\ 0000\ 0000\ $\\
+            &$carry         $&$         0000.0110\ 0000\ 0000   $&$\ll1    $&$C_4=1111.1111\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 5: &$WS_4          $&$         1110.1001\ 0010\ 0000   $&          &\\
+            &$WC_4          $&$         0000.1100\ 0000\ 0000   $&          &($W_{msbs}=1110\ \text{so}\ s_5=-1$)\\
+            &$F_5=2S_4-K_5 $&${\color{blue}0001.0101\ 1}000\ 0000$&          &$S_5={\color{blue}0000.1010\ 1}000\ 0000$\\
+            &                &                                   &          &$SM_5={\color{blue}0000.1010\ 0}000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.0000\ 1010\ 0000   $&$\ll1    $&$K_5=0000.0000\ 1000\ 0000\ $\\
+            &$carry         $&$         0001.1010\ 0000\ 0000   $&$\ll1    $&$C_5=1111.1111\ 1000\ 0000\,$\\
+    \hdashline\\
+    Step 6: &$WS_5          $&$         1110.0001\ 0100\ 0000   $&          &\\
+            &$WC_5          $&$         0011.0100\ 0000\ 0000   $&          &($W_{msbs}=0001\ \text{so}\ s_6=1$)\\
+            &$F_6=-2S_5-K_6$&${\color{blue}1110.1010\ 11}00\ 0000$&          &$S_6={\color{blue}0000.1010\ 11}00\ 0000$\\
+            &                &                                   &          &$SM_6={\color{blue}0000.1010\ 10}00\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         0011.1111\ 1000\ 0000   $&$\ll1    $&$K_6=0000.0000\ 0100\ 0000\ $\\
+            &$carry         $&$         1100.0000\ 1000\ 0000   $&$\ll1    $&$C_6=1111.1111\ 1100\ 0000\,$\\
+            &                &                                   &          &$sum+carry=0$, terminate\\
+    \hdashline\\
+    Terminate&Square Root&0.101011
+\end{tabular}
+\end{center}
+}
+
+{\small
+\begin{center}
+\begin{tabular}{cccc}
+    \hdashline\\
+    Step 6: &$WS    $&$0111.1111\ 0000\ 00$&\\
+            &$WC    $&$1000.0001\ 0000\ 00$&($W_{msbs}=1111\ \text{so}\ s_6=0$)\\
+            &$F     $&$0000.0000\ 0000\ 00$&$S_6=\mathbf{1.0101\ 10}00\ 00$\\
+            &        &                     &$SM_6=\mathbf{1.0101\ 01}00\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$1111.1110\ 0000\ 00$&$\ll1$\\
+            &$WC    $&$0000.0010\ 0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 7: &$WS    $&$1111.1100\ 0000\ 00$&\\
+            &$WC    $&$0000.0100\ 0000\ 00$&($W_{msbs}=1111\ \text{so}\ s_7=0$)\\
+            &$F     $&$0000.0000\ 0000\ 00$&$S_7=\mathbf{1.0101\ 100}0\ 00$\\
+            &        &                     &$SM_7=\mathbf{1.0101\ 011}0\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$1111.1000\ 0000\ 00$&$\ll1$\\
+            &$WC    $&$0000.1000\ 0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 8: &$WS    $&$1111.0000\ 0000\ 00$&\\
+            &$WC    $&$0001.0000\ 0000\ 00$&($W_{msbs}=0000\ \text{so}\ s_8=1$)\\
+            &$F     $&$1110.1010\ 0111\ 10$&$S_8=\mathbf{1.0101\ 1001}\ 00$\\
+            &        &                     &$SM_8=\mathbf{1.0101\ 1000}\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$0000.1010\ 0111\ 10$&$\ll1$\\
+            &$WC    $&$1110.0000\ 0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 9: &$WS    $&$0001.0100\ 1111\ 00$&\\
+            &$WC    $&$1100.0000\ 0000\ 00$&($W_{msbs}=1101\ \text{so}\ s_9=-1$)\\
+            &$F     $&$0001.0101\ 1000\ 11$&$S_9=\mathbf{1.0101\ 1000\ 1}0$\\
+            &        &                     &$SM_9=\mathbf{1.0101\ 1000\ 0}0\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$1100.0001\ 0111\ 11$&$\ll1$\\
+            &$WC    $&$0010.1001\ 0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 10:&$WS    $&$1000.0010\ 1111\ 10$&\\
+            &$WC    $&$0101.0010\ 0000\ 00$&($W_{msbs}=1101\ \text{so}\ s_{10}=-1$)\\
+            &$F     $&$0001.0101\ 1000\ 01$&$S_{10}=\mathbf{1.0101\ 1000\ 01}$\\
+            &        &                     &$SM_{10}=\mathbf{1.0101\ 1000\ 00}\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$1100.0001\ 0111\ 11$&$\ll1$\\
+            &$WC    $&$0010.0101\ 0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 11:&$WS    $&$1000.0010\ 1111\ 10$&\\
+            &$WC    $&$0100.1010\ 0000\ 00$&($W_{msbs}=1101\ \text{so}\ s_{11}=-1$)\\
+            &$F     $&$0001.0101\ 1000\ 00$&$S_{11}=\mathbf{1.0101\ 1000\ 00}$\\
+            &        &                     &$SM_{11}=\mathbf{1.0101\ 1000\ 00}\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$1101.1101\ 0111\ 10$&$\ll1$\\
+            &$WC    $&$0000.0101\ 0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 12:&$WS    $&$1101.1000\ 0111\ 10$&\\
+            &$WC    $&$0000.1010\ 0000\ 00$&($W_{msbs}=1101\ \text{so}\ s_{12}=-1$)\\
+            &$F     $&$0001.0101\ 1000\ 00$&$S_{12}=\mathbf{1.0101\ 1000\ 00}$\\
+            &        &                     &$SM_{12}=\mathbf{1.0101\ 1000\ 00}\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$1100.0111\ 1111\ 10$&$\ll1$\\
+            &$WC    $&$0011.0000\ 0000\ 00$&$\ll1$\\
+\end{tabular}
+\end{center}
+}
+
+{\small
+\begin{center}
+\begin{tabular}{cccc}
+    Step 13:&$WS    $&$1000.1111\ 1111\ 00$&\\
+            &$WC    $&$0110.0000\ 0000\ 00$&($W_{msbs}=1110\ \text{so}\ s_{13}=-1$)\\
+            &$F     $&$0001.0101\ 1000\ 00$&$S_{13}=\mathbf{1.0101\ 1000\ 00}$\\
+            &        &                     &$SM_{13}=\mathbf{1.0101\ 1000\ 00}\phantom{M}$\\
+    \cline{2-3}
+            &$WS    $&$1111.1010\ 0111\ 10$&$\ll1$\\
+            &$WC    $&$0000.1011\ 0000\ 00$&$\ll1$\\
+\end{tabular}
+\end{center}
+}
+
+\vfill
+\eject
+
+{\small
+\begin{center}
+\begin{tabular}{ccccc}
+            &$X             $&$         0000.1100\ 0000     $&          &$S_0={\color{blue}0001}.0000\ 0000\ 00$\\
+            &$WS=X-1        $&$         1111.1100\ 0000     $&          &$SM_0={\color{blue}0000}.0000\ 0000\ 00\phantom{M}$\\
+            &$WC            $&$         0000.0000\ 0000     $&          &$K_0=0000.0100\ 0000\ 00\ $\\
+            &                &                               &          &$C_0=1111.1100\ 0000\ 00\,$\\
+    \hdashline\\
+    Step 1: &$WS            $&$         1111.1100\ 0000     $&          &\\
+            &$WC            $&$         0000.0000\ 0000     $&          &($W_{msbs}=1111\ \text{so}\ s_1=0$)\\
+            &$F_1=0         $&${\color{blue}0000.00}00\ 0000$&          &$S_1={\color{blue}0001.0}000\ 0000\ 00$\\
+            &                &                               &          &$SM_1={\color{blue}0000.1}000\ 0000\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS            $&$         1111.1100\ 0000     $&$\ll1    $&$K_1=0000.0010\ 0000\ 00\ $\\
+            &$WC            $&$         0000.0000\ 0000     $&$\ll1    $&$C_1=1111.1110\ 0000\ 00\,$\\
+    \hdashline\\    
+    Step 2: &$WS            $&$         1111.1000\ 0000     $&          &\\
+            &$WC            $&$         0000.0000\ 0000     $&          &($W_{msbs}=1111\ \text{so}\ s_2=0$)\\
+            &$F_2=0         $&${\color{blue}0000.000}0\ 0000$&          &$S_2={\color{blue}0001.00}00\ 0000\ 00$\\
+            &                &                               &          &$SM_2={\color{blue}0000.11}00\ 0000\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS            $&$         1111.1000\ 0000     $&$\ll1    $&$K_2=0000.0001\ 0000\ 00\ $\\
+            &$WC            $&$         0000.0000\ 0000     $&$\ll1    $&$C_2=1111.1111\ 0000\ 00\,$\\
+    \hdashline\\
+    Step 3: &$WS            $&$         1111.0000\ 0000     $&          &\\
+            &$WC            $&$         0000.0000\ 0000     $&          &($W_{msbs}=1111\ \text{so}\ s_3=0$)\\
+            &$F_3=0         $&${\color{blue}0000.0000}\ 0000$&          &$S_3={\color{blue}0001.000}0\ 0000\ 00$\\
+            &                &                               &          &$SM_3={\color{blue}0000.111}0\ 0000\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS            $&$1111.0000\ 0000              $&$\ll1    $&$K_3=0000.0000\ 1000\ 00\ $\\
+            &$WC            $&$0000.0000\ 0000              $&$\ll1    $&$C_3=1111.1111\ 1000\ 00\,$\\
+    \hdashline\\
+    Step 4: &$WS            $&$1110.0000\ 0000              $&          &\\
+            &$WC            $&$0000.0000\ 0000              $&          &($W_{msbs}=1110\ \text{so}\ s_4=-1$)\\
+            &$F_4=S_3-K_3   $&${\color{blue}0000.1111\ 1}000$&          &$S_4={\color{blue}0000.1111}\ 0000\ 00$\\
+            &                &                               &          &$SM_4={\color{blue}0000.1110}\ 0000\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS            $&$1110.1111\ 1000              $&$\ll1    $&$K_4=0000.0000\ 0100\ 00\ $\\
+            &$WC            $&$0000.0000\ 0000              $&$\ll1    $&$C_4=1111.1111\ 1100\ 00\,$\\
+    \hdashline\\
+    Step 5: &$WS            $&$1101.1111\ 0000              $&          &\\
+            &$WC            $&$0000.0000\ 0000              $&          &($W_{msbs}=1101\ \text{so}\ s_5=-1$)\\
+            &$F_5=S_4-K_4   $&${\color{blue}0000.1110\ 11}00$&          &$S_5={\color{blue}0000.1110\ 1}000\ 00$\\
+            &                &                                   &          &$SM_5={\color{blue}0001.1110\ 0}000\ 00\phantom{M}$\\
+    \cline{2-3}
+            &$WS            $&$1101.0001\ 1100              $&$\ll1    $&$K_5=0000.0000\ 0010\ 00\ $\\
+            &$WC            $&$0001.1100\ 0000              $&$\ll1    $&$C_5=1111.1111\ 1110\ 00\,$\\
+    Terminate
+\end{tabular}
+\end{center}
+}
+
+\vfill
+\eject
+
+X = 0.1010101101(685/1024)
+
+S = 0.1101000110(838/1024)
+
+once R4 sslc gets here i can fill this in
+
+\vfill 
+\eject
+
+X = 1.1001 (25/16)
+
+S = 1.0100 (20/16)
+
+{\small
+\begin{center}
+\begin{tabular}{cccc}
+    Attempt 1:& $X$ is normalized& to $1/2<X<2$&$W_{msbs}$ looks at Q4.0\\
+    &&&\\
+            &$X     $&$0001.1001$&\\
+            &$WS=X-1$&$0000.1001$& $s_0=1$\\
+            &$WC    $&$0000.0000$&$S_0=\mathbf{1}.0000,\quad SM_0=\mathbf{0}.0000$ \\
+    \hdashline\\
+    Step 1: &$WS    $&$0000.1001$&\\
+            &$WC    $&$0000.0000$&($W_{msbs}=0000\ \text{so}\ s_1=1$)\\
+            &$F     $&$1110.1100$&$S_1=\mathbf{1.1}000,\quad SM_1=\mathbf{1.0}000$\\
+    \cline{2-3}
+            &$WS    $&$1110.0101$&$\ll1$\\
+            &$WC    $&$0001.0000$&$\ll1$\\
+    \hdashline\\
+    Step 2: &$WS    $&$1100.1010$&\\
+            &$WC    $&$0010.0000$&($W_{msbs}=1110\ \text{so}\ s_2=-1$)\\
+            &$F     $&$0001.0110$&$S_2=\mathbf{1.01}00,\quad SM_2=\mathbf{1.00}00$\\
+    \cline{2-3}
+            &$WS    $&$1111.1110$&$\ll1$\\
+            &$WC    $&$0000.0010$&$\ll1$\\
+    \hdashline\\
+    Step 3: &$WS    $&$1111.1100$&\\
+            &$WC    $&$0000.0100$&($W_{msbs}=1111\ \text{so}\ s_3=0$)\\
+            &$-q_3D $&$0000.0000$&$S_3=\mathbf{1.010}0,\quad SM_3=\mathbf{1.001}0$\\
+    \cline{2-3}
+            &$WS    $&$1111.1000$&$\ll1$\\
+            &$WC    $&$0000.1000$&$\ll1$\\
+    \hdashline\\
+    Step 4: &$WS    $&$1111.0000$&\\
+            &$WC    $&$0001.0000$&($W_{msbs}=0000\ \text{so}\ s_4=1$)\\
+            &$F     $&$0000.0000$&$S_4=\mathbf{1.0101},\quad SM_4=\mathbf{1.0100}$\\
+    Terminate&&&
+\end{tabular}
+\end{center}
+}
+
+
+{\small
+\begin{center}
+\begin{tabular}{cccc}
+    Attempt 2:& $X$ is normalized& to $1/2<X<2$&$W_{msbs}$ looks at Q3.1\\
+    &&&\\
+            &$X     $&$001.1001$&\\
+            &$WS=X-1$&$000.1001$& $s_0=1$\\
+            &$WC    $&$000.0000$&$S_0=\mathbf{1}.0000,\quad SM_0=\mathbf{0}.0000$ \\
+    \hdashline\\
+    Step 1: &$WS    $&$000.1001$&\\
+            &$WC    $&$000.0000$&($W_{msbs}=000.1\ \text{so}\ s_1=1$)\\
+            &$F     $&$110.1000$&$S_1=\mathbf{1.1}000,\quad SM_1=\mathbf{1.0}000$\\
+    \cline{2-3}
+            &$WS    $&$110.0001$&$\ll1$\\
+            &$WC    $&$001.0000$&$\ll1$\\
+    \hdashline\\
+    Step 2: &$WS    $&$100.0010$&\\
+            &$WC    $&$010.0000$&($W_{msbs}=110.0\ \text{so}\ s_2=-1$)\\
+            &$F     $&$001.0100$&$S_2=\mathbf{1.01}00,\quad SM_2=\mathbf{1.00}00$\\
+    \cline{2-3}
+            &$WS    $&$111.0110$&$\ll1$\\
+            &$WC    $&$000.0000$&$\ll1$\\
+    \hdashline\\
+    Step 3: &$WS    $&$110.1100$&\\
+            &$WC    $&$000.0000$&($W_{msbs}=110.1\ \text{so}\ s_3=-1$)\\
+            &$-q_3D $&$001.0010$&$S_3=\mathbf{1.001}0,\quad SM_3=\mathbf{1.000}0$\\
+    \cline{2-3}
+            &$WS    $&$111.1110$&$\ll1$\\
+            &$WC    $&$000.0000$&$\ll1$\\
+    \hdashline\\
+    Step 4: &$WS    $&$111.1110$&\\
+            &$WC    $&$000.0000$&($W_{msbs}=111.1\ \text{so}\ s_4=0$)\\
+            &$F     $&$000.0000$&$S_4=\mathbf{1.0010},\quad SM_4=\mathbf{1.0001}$\\
+    Terminate&&&
+\end{tabular}
+\end{center}
+}
+
+{\small
+\begin{center}
+\begin{tabular}{cccc}
+    Attempt 3:& $X$ is normalized& to $1<X<4$&$W_{msbs}$ looks at Q4.0\\
+    &&&\\
+            &$X     $&$0001.1001$&\\
+            &$WS=X-2$&$1111.1001$& $s_{-1}=10$\\
+            &$WC    $&$0000.0000$&$S_{-1}=\mathbf{1}0.0000,\quad SM_{-1}=\mathbf{0}0.0000$ \\
+    \hdashline\\
+    Step 0: &$WS    $&$1111.1001$&\\
+            &$WC    $&$0000.0000$&($W_{msbs}=1111\ \text{so}\ s_0=0$)\\
+            &$F     $&$0000.0000$&$S_0=\mathbf{10}.0000,\quad SM_0=\mathbf{01}.0000$\\
+    \cline{2-3}
+            &$WS    $&$1111.1001$&$\ll1$\\
+            &$WC    $&$0000.0000$&$\ll1$\\
+    \hdashline\\
+    Step 1: &$WS    $&$1111.0010$&\\
+            &$WC    $&$0000.0000$&($W_{msbs}=1111\ \text{so}\ s_1=0$)\\
+            &$F     $&$0000.0000$&$S_1=\mathbf{10.0}000,\quad SM_1=\mathbf{01.1}000$\\
+    \cline{2-3}
+            &$WS    $&$1111.0010$&$\ll1$\\
+            &$WC    $&$0000.0000$&$\ll1$\\
+    \hdashline\\
+    Step 2: &$WS    $&$1110.0100$&\\
+            &$WC    $&$0000.0000$&($W_{msbs}=1110\ \text{so}\ s_2=-1$)\\
+            &$-q_3D $&$0001.1100$&$S_2=\mathbf{01.01}00,\quad SM_2=\mathbf{01.00}00$\\
+    \cline{2-3}
+            &$WS    $&$1111.1000$&$\ll1$\\
+            &$WC    $&$0000.1000$&$\ll1$\\
+    Terminate&&&
+\end{tabular}
+\end{center}
+}
+
+\vfill
+\eject
+
+X = 0.011001 (25/64)
+
+S = 0.101000 (40/64)
+
+{\small
+\begin{center}
+\begin{tabular}{cccc}
+    Attempt 4:& $X$ is normalized& to $1/4<X<1$&$W_{msbs}$ looks at Q3.1\\
+    &&&\\
+            &$X     $&$000.0110\ 01$&\\
+            &$WS=X-1$&$111.0110\ 01$& $s_0=1$\\
+            &$WC    $&$000.0000\ 00$&$S_0=\mathbf{1}.000000,\quad SM_0=\mathbf{0}.000000$ \\
+    \hdashline\\
+    Step 1: &$WS    $&$111.0110\ 01$&\\
+            &$WC    $&$000.0000\ 00$&($W_{msbs}=111.0\ \text{so}\ s_1=-1$)\\
+            &$F     $&$000.1000\ 00$&$S_1=\mathbf{0.1}00000,\quad SM_1=\mathbf{0.0}00000$\\
+    \cline{2-3}
+            &$WS    $&$111.1110\ 01$&$\ll1$\\
+            &$WC    $&$000.0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 2: &$WS    $&$111.1100\ 10$&\\
+            &$WC    $&$000.0000\ 00$&($W_{msbs}=111.1\ \text{so}\ s_2=0$)\\
+            &$F     $&$000.0000\ 00$&$S_2=\mathbf{0.10}0000,\quad SM_2=\mathbf{0.01}0000$\\
+    \cline{2-3}
+            &$WS    $&$111.1100\ 10$&$\ll1$\\
+            &$WC    $&$000.0000\ 00$&$\ll1$\\
+    \hdashline\\
+    Step 3: &$WS    $&$111.1001\ 00$&\\
+            &$WC    $&$000.0000\ 00$&($W_{msbs}=010.0\ \text{so}\ s_3=1$)\\
+            &$-q_3D $&$111.0010\ 00$&$S_3=\mathbf{0.111}000,\quad SM_3=\mathbf{0.110}000$\\
+    \cline{2-3}
+            &$WS    $&$1111.1110$&$\ll1$\\
+            &$WC    $&$0000.0000$&$\ll1$\\
+    \hdashline\\
+    Step 4: &$WS    $&$1111.1110$&\\
+            &$WC    $&$0000.0000$&($W_{msbs}=111.1\ \text{so}\ s_4=0$)\\
+            &$F     $&$0000.0000$&$S_4=\mathbf{1.0010}00,\quad SM_4=\mathbf{1.0001}00$\\
+    Terminate&&&
+\end{tabular}
+\end{center}
+}
+
+
+\end{document}
--- a/docs/divsqrt_tex/sqrt4.tex
+++ b/docs/divsqrt_tex/sqrt4.tex
@ -0,0 +1,147 @@
+\documentclass[12pt]{article}
+
+\usepackage{amssymb, amsmath, amsfonts, amsthm, graphicx, tcolorbox}
+
+\usepackage{arydshln}
+
+\parskip = .2in
+\parindent  = 0in
+
+\pagestyle{empty}
+
+%==========
+%==========
+
+\begin{document}
+
+
+
+X = 0.1011 0111 10 (734/1024)
+
+S = 0.1101 1000 11\ 00 (3468/4096), negative sticky bit
+{\small
+\begin{center}
+\begin{tabular}{ccccc}
+            &$X             $&$         0000.1011\ 0111\ 1000   $&          &$S_0={\color{blue}0001}.0000\ 0000\ 0000$\\
+            &$WS_0=4(X-1)   $&$         1110.1101\ 1110\ 0000   $&          &$SM_0={\color{blue}0000}.0000\ 0000\ 0000\phantom{M}$\\
+            &$WC_0          $&$         0000.0000\ 0000\ 0000   $&          &$K_0=0001.0000\ 0000\ 0000\ $\\
+            &                &                                   &          &$C_0=1111.0000\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 1: &$WS_0          $&$         1110.1101\ 1110\ 0000   $&          &\\
+            &$WC_0          $&$         0000.0000\ 0000\ 0000   $&          &($W_{msbs}=1110.110\ \text{so}\ s_1=-1$)\\
+            &$F_1=2S_0-K_1$&${\color{blue}0001.11}00\ 0000\ 0000$&          &$S_1={\color{blue}0000.11}00\ 0000\ 0000$\\
+            &                &                                   &          &$SM_1={\color{blue}0000.10}00\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.0001\ 1110\ 0000   $&$\ll2    $&$K_1=0000.0100\ 0000\ 0000\ $\\
+            &$carry         $&$         0001.1000\ 0000\ 0000   $&$\ll2    $&$C_1=1111.1100\ 0000\ 0000\,$\\
+    \hdashline\\    
+    Step 2: &$WS_1          $&$         1100.0111\ 1000\ 0000   $&          &\\
+            &$WC_1          $&$         0110.0000\ 0000\ 0000   $&          &($W_{msbs}=0010.011\ \text{so}\ s_2=2$)\\
+            &$F_2=-4S_1-4K_2$&${\color{blue}1100.1100}\ 0000\ 0000$&        &$S_2={\color{blue}0000.1110}\ 0000\ 0000$\\
+            &                &                                   &          &$SM_2={\color{blue}0000.1101}\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         0110.1011\ 1000\ 0000   $&$\ll2    $&$K_2=0000.0001\ 0000\ 0000\ $\\
+            &$carry         $&$         1000.1000\ 0000\ 0000   $&$\ll2    $&$C_2=1111.1111\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 3: &$WS_2          $&$         1010.1110\ 0000\ 0000   $&          &\\
+            &$WC_2          $&$         0010.0000\ 0000\ 0000   $&          &($W_{msbs}=1100.111\ \text{so}\ s_3=-2$)\\
+            &$F_3=4S_2-4K_3$&${\color{blue}0011.0111\ 00}00\ 0000$&         &$S_3={\color{blue}0000.1101\ 10}00\ 0000$\\
+            &                &                                   &          &$SM_3={\color{blue}0000.1101\ 01}00\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1011.1001\ 0000\ 0000   $&$\ll2    $&$K_3=0000.0000\ 0100\ 0000\ $\\
+            &$carry         $&$         0100.1100\ 0000\ 0000   $&$\ll2    $&$C_3=1111.1111\ 1100\ 0000\,$\\
+    \hdashline\\
+    Step 4: &$WS_3          $&$         1110.0100\ 0000\ 0000   $&          &\\
+            &$WC_3          $&$         0011.0000\ 0000\ 0000   $&          &($W_{msbs}=0001.010\ \text{so}\ s_4=1$)\\
+            &$F_4=-2S_3-K_4$&${\color{blue}1110.0100\ 1111}\ 0000$&        &$S_4={\color{blue}0000.1101\ 1001}\ 0000$\\
+            &                &                                   &          &$SM_4={\color{blue}0000.1101\ 1000}\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         0011.0000\ 1111\ 0000   $&$\ll2    $&$K_4=0000.0000\ 0001\ 0000\ $\\
+            &$carry         $&$         1100.1000\ 0000\ 0000   $&$\ll2    $&$C_4=1111.1111\ 1111\ 0000\,$\\
+    \hdashline\\
+    Step 5: &$WS_4          $&$         1100.0011\ 1100\ 0000   $&          &\\
+            &$WC_4          $&$         0010.0000\ 0000\ 0000   $&          &($W_{msbs}=1110.001\ \text{so}\ s_5=-1$)\\
+            &$F_5=2S_4-K_5 $&${\color{blue}0001.1011\ 0001\ 11}00$&        &$S_5={\color{blue}0000.1101\ 1000\ 11}00$\\
+            &                &                                   &          &$SM_5={\color{blue}0000.1101\ 1000\ 10}00\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.1000\ 1101\ 1100   $&$\ll2    $&$K_5=0000.0000\ 0000\ 0100\ $\\
+            &$carry         $&$         0000.0110\ 0000\ 0000   $&$\ll2    $&$C_5=1111.1111\ 1111\ 1100\,$\\
+    \hdashline\\
+    Step 6: &$WS_5          $&$         1110.0011\ 0111\ 0000   $&          &\\
+            &$WC_5          $&$         0001.1000\ 0000\ 0000   $&          &($W_{msbs}=1111.101\ \text{so}\ s_6=0$)\\
+            &$F_6=0        $&${\color{blue}0000.0000\ 0000\ 0000}$&         &$S_6={\color{blue}0000.1101\ 1000\ 1100}$\\
+            &                &                                   &          &$SM_6={\color{blue}0000.1101\ 1000\ 1011}\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.1011\ 0111\ 0111   $&$\ll2    $&$K_6=0000.0000\ 0000\ 0001\ $\\
+            &$carry         $&$         0000.0000\ 0000\ 0000   $&$\ll2    $&$C_6=1111.1111\ 1111\ 1111\,$\\
+    \hdashline\\
+    Terminate&Square Root&0000.11 01\ 10 00\ 10 (11)
+\end{tabular}
+\end{center}
+}
+
+X = 0.1001\ 0101\ 00 (596/1024)
+
+S = 0.1100 0011 01\ 01 (3125/4096)
+{\small
+\begin{center}
+\begin{tabular}{ccccc}
+            &$X             $&$         0000.1001\ 0101\ 0000   $&          &$S_0={\color{blue}0001}.0000\ 0000\ 0000$\\
+            &$WS_0=4(X-1)   $&$         1110.0101\ 0100\ 0000   $&          &$SM_0={\color{blue}0000}.0000\ 0000\ 0000\phantom{M}$\\
+            &$WC_0          $&$         0000.0000\ 0000\ 0000   $&          &$K_0=0001.0000\ 0000\ 0000\ $\\
+            &                &                                   &          &$C_0=1111.0000\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 1: &$WS_0          $&$         1110.0101\ 0100\ 0000   $&          &\\
+            &$WC_0          $&$         0000.0000\ 0000\ 0000   $&          &($W_{msbs}=1110.010\ \text{so}\ s_1=-1$)\\
+            &$F_1=2S_0-K_1  $&${\color{blue}0001.11}00\ 0000\ 0000$&        &$S_1={\color{blue}0000.11}00\ 0000\ 0000$\\
+            &                &                                   &          &$SM_1={\color{blue}0000.10}00\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.1001\ 0100\ 0000   $&$\ll2    $&$K_1=0000.0100\ 0000\ 0000\ $\\
+            &$carry         $&$         0000.1000\ 0000\ 0000   $&$\ll2    $&$C_1=1111.1100\ 0000\ 0000\,$\\
+    \hdashline\\    
+    Step 2: &$WS_1          $&$         1110.0101\ 0000\ 0000   $&          &\\
+            &$WC_2          $&$         0010.0000\ 0000\ 0000   $&          &($W_{msbs}=0000.010\ \text{so}\ s_2=0$)\\
+            &$F_2=0       $&${\color{blue}0000.0000}\ 0000\ 0000$&          &$S_2={\color{blue}0000.1100}\ 0000\ 0000$\\
+            &                &                                   &          &$SM_2={\color{blue}0000.1110}\ 0000\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1100.0101\ 0000\ 0000   $&$\ll2    $&$K_2=0000.0001\ 0000\ 0000\ $\\
+            &$carry         $&$         0100.0000\ 0000\ 0000   $&$\ll2    $&$C_2=1111.1111\ 0000\ 0000\,$\\
+    \hdashline\\
+    Step 3: &$WS_2          $&$         0001.0100\ 0000\ 0000   $&          &\\
+            &$WC_2          $&$         0000.0000\ 0000\ 0000   $&          &($W_{msbs}=0001.010\ \text{so}\ s_3=1$)\\
+            &$F_3=-2S_2-K_3$&${\color{blue}1110.0111\ 11}00\ 0000$&         &$S_3={\color{blue}0000.1100\ 01}00\ 0000$\\
+            &                &                                   &          &$SM_3={\color{blue}0000.1100\ 00}00\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.0011\ 1100\ 0000   $&$\ll2    $&$K_3=0000.0000\ 0100\ 0000\ $\\
+            &$carry         $&$         1000.1000\ 0000\ 0000   $&$\ll2    $&$C_3=1111.1111\ 1100\ 0000\,$\\
+    \hdashline\\
+    Step 4: &$WS_3          $&$         1100.1111\ 0000\ 0000   $&          &\\
+            &$WC_3          $&$         0010.0000\ 0000\ 0000   $&          &($W_{msbs}=1110.111\ \text{so}\ s_4=-1$)\\
+            &$F_4=2S_3-K_3  $&${\color{blue}0001.1000\ 0111}\ 0000$&        &$S_4={\color{blue}0000.1100\ 0011}\ 0000$\\
+            &                &                                   &          &$SM_4={\color{blue}0000.1100\ 0010}\ 0000\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         1111.0111\ 0111\ 0000   $&$\ll2    $&$K_4=0000.0000\ 0001\ 0000\ $\\
+            &$carry         $&$         0001.0000\ 0000\ 0000   $&$\ll2    $&$C_4=1111.1111\ 1111\ 0000\,$\\
+    \hdashline\\
+    Step 5: &$WS_4          $&$         1101.1101\ 1100\ 0000   $&          &\\
+            &$WC_4          $&$         0100.0000\ 0000\ 0000   $&          &($W_{msbs}=0001.110\ \text{so}\ s_5=1$)\\
+            &$F_5=-2S_4-K_4 $&${\color{blue}1110.0111\ 1001\ 11}00$&        &$S_5={\color{blue}0000.1100\ 0011\ 01}00$\\
+            &                &                                   &          &$SM_5={\color{blue}0000.1100\ 0011\ 00}00\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         0111.1010\ 0101\ 1100   $&$\ll1    $&$K_5=0000.0000\ 0000\ 0100\ $\\
+            &$carry         $&$         1000.1011\ 0000\ 0000   $&$\ll1    $&$C_5=1111.1111\ 1111\ 1100\,$\\
+    \hdashline\\
+    Step 6: &$WS_5          $&$         1110.1001\ 0111\ 0000   $&          &\\
+            &$WC_5          $&$         0010.1100\ 0000\ 0000   $&          &($W_{msbs}=0001.010\ \text{so}\ s_6=1$)\\
+            &$F_6=          $&$                                 $&          &$S_5={\color{blue}0000.1100\ 0011\ 0101}$\\
+            &                &                                   &          &$SM_5={\color{blue}0000.1100\ 0011\ 0100}\phantom{M}$\\
+    \cline{2-3}
+            &$sum           $&$         0001.1110\ 1001\ 0111   $&$\ll1    $&$K_5=0000.0000\ 0000\ 0001\ $\\
+            &$carry         $&$         1110.0010\ 1100\ 0000   $&$\ll1    $&$C_5=1111.1111\ 1111\ 1111\,$\\
+    \hdashline\\
+    Terminate
+\end{tabular}
+\end{center}
+}
+
+\end{document}
--- a/examples/fp/softfloat_demo/Makefile
+++ b/examples/fp/softfloat_demo/Makefile
@ -2,14 +2,13 @@

 CC     = gcc
 CFLAGS = -O3
-LIBS   = -lm
 LFLAGS = -L. 
 # Link against the riscv-isa-sim version of SoftFloat rather than 
 # the regular version to get RISC-V NaN behavior
-IFLAGS   = -I$(RISCV)/riscv-isa-sim/softfloat
-LIBS   = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
-#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
-#LIBS   = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
+#IFLAGS   = -I$(RISCV)/riscv-isa-sim/softfloat
+#LIBS   = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
+IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
+LIBS   = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a -lm -lquadmath
 SRCS   = $(wildcard *.c)

 PROGS = $(patsubst %.c,%,$(SRCS))
@ -17,7 +16,7 @@ PROGS = $(patsubst %.c,%,$(SRCS))
 all:	$(PROGS)

 %: %.c
-	$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
+	$(CC) $(CFLAGS) -DSOFTFLOAT_FAST_INT64 $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)

 clean: 
 	rm -f $(PROGS)
--- a/examples/fp/softfloat_demo/softfloat_demo2.c
+++ b/examples/fp/softfloat_demo/softfloat_demo2.c
@ -1,77 +0,0 @@
-//
-// softfloat_div.c
-// james.stine@okstate.edu 12 April 2023
-// 
-// Demonstrate using SoftFloat to compute 754 fp divide, then print results
-// (adapted from original C built by David Harris)
-//
-
-#include <stdio.h>
-#include <stdint.h>
-#include "softfloat.h"
-#include "softfloat_types.h"
-typedef union sp {
-  uint32_t v;
-  unsigned short x[2];
-  float f;
-} sp;
-
-void printF32 (char *msg, float32_t f) {
-  sp conv;
-  int i, j;
-  conv.v = f.v; // use union to convert between hexadecimal and floating-point views
-  printf("%s: ", msg);  // print out nicely
-  printf("0x%04x_%04x = %1.15g\n", (conv.v >> 16),(conv.v & 0xFFFF), conv.f);
-}
-
-void printFlags(void) {
-  int NX = softfloat_exceptionFlags % 2;
-  int UF = (softfloat_exceptionFlags >> 1) % 2;
-  int OF = (softfloat_exceptionFlags >> 2) % 2;
-  int DZ = (softfloat_exceptionFlags >> 3) % 2;
-  int NV = (softfloat_exceptionFlags >> 4) % 2;
-  printf ("Flags: Inexact %d Underflow %d Overflow %d DivideZero %d Invalid %d\n", 
-          NX, UF, OF, DZ, NV);
-}
-
-void softfloatInit(void) {
-    // RNE: softfloat_round_near_even
-    // RZ:  softfloat_round_minMag
-    // RU:  softfloat_round_max
-    // RD:  softfloat_round_min
-    // RM: softfloat_round_near_maxMag   
-    softfloat_roundingMode = softfloat_round_near_even; 
-    softfloat_exceptionFlags = 0; // clear exceptions
-    softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess
-}
-
-int main() {
-
-  // float32_t is typedef in SoftFloat
-  float32_t x, y, r1, r2;
-  sp convx, convy;
-
-  // Choose two random values
-  convx.f = 1.30308703073;
-  convy.f = 1.903038030370;
-  // Convert to SoftFloat format
-  x.v = (convx.x[1] << 16) + convx.x[0];
-  y.v = (convy.x[1] << 16) + convy.x[0];  
-
-  printf("Example using SoftFloat\n");
-  
-  softfloatInit();
-  r1 = f32_div(x, y);
-  printf("-------\n");
-  printF32("X", x);
-  printF32("Y", y); 
-  printF32("result = X/Y", r1);
-  printFlags();
-
-  r2 = f32_sqrt(x);
-  printf("-------\n");    
-  printF32("X", x);
-  printF32("result = sqrt(X)", r2);
-  printFlags();  
-
-}
--- a/examples/fp/softfloat_demo/softfloat_demoDP.c
+++ b/examples/fp/softfloat_demo/softfloat_demoDP.c
@ -0,0 +1,88 @@
+// softfloat_demo3.c
+// james.stine@okstate.edu 15 August 2023
+// 
+// Demonstrate using SoftFloat do compute a floating-point for quad, then print results
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <quadmath.h> // GCC Quad-Math Library
+#include "softfloat.h"
+#include "softfloat_types.h"
+typedef union sp {
+  uint32_t v;
+  float f;
+} sp;
+
+typedef union dp {
+  uint64_t v;
+  double d;
+} dp;
+
+typedef union qp {
+  uint64_t v[2];
+  __float128 q;
+} qp;
+
+
+void printF32 (char *msg, float32_t f) {
+  sp conv;
+  int i, j;
+  conv.v = f.v; // use union to convert between hexadecimal and floating-point views
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%04x_%04x = %g\n", (conv.v >> 16),(conv.v & 0xFFFF), conv.f);
+}
+
+void printF64 (char *msg, float64_t d) {
+  dp conv;
+  int i, j;
+  conv.v = d.v; // use union to convert between hexadecimal and floating-point views
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%08x_%08x = %g\n", (conv.v >> 32),(conv.v & 0xFFFFFFFF), conv.d);
+}
+
+void printF128 (char *msg, float128_t q) {
+  qp conv;
+  int i, j;
+  conv.v[0] = q.v[0]; // use union to convert between hexadecimal and floating-point views
+  conv.v[1] = q.v[1]; // use union to convert between hexadecimal and floating-point views  
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%016" PRIx64 "_%016" PRIx64 " = %1.15Qe\n", q.v[1], q.v[0], conv.q);
+}
+
+void printFlags(void) {
+  int NX = softfloat_exceptionFlags % 2;
+  int UF = (softfloat_exceptionFlags >> 1) % 2;
+  int OF = (softfloat_exceptionFlags >> 2) % 2;
+  int DZ = (softfloat_exceptionFlags >> 3) % 2;
+  int NV = (softfloat_exceptionFlags >> 4) % 2;
+  printf ("Flags: Inexact %d Underflow %d Overflow %d DivideZero %d Invalid %d\n", 
+          NX, UF, OF, DZ, NV);
+}
+
+void softfloatInit(void) {
+    // rounding modes: RNE: softfloat_round_near_even
+    //                 RZ:  softfloat_round_minMag
+    //                 RP:  softfloat_round_max
+    //                 RM:  softfloat_round_min
+    softfloat_roundingMode = softfloat_round_near_even; 
+    softfloat_exceptionFlags = 0; // clear exceptions
+    softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess
+}
+
+int main() {
+  
+  float64_t x, y, z;
+  float64_t r;
+
+  x.v = 0xBFFF988ECE97DFEB;
+  y.v = 0x3F8EFFFFFFFFFFFF;
+  z.v = 0x4001000000000000;
+
+  softfloatInit();
+  printF64("X", x); printF64("Y", y); printF64("Z", z);
+  r = f64_mulAdd(x, y, z);
+  printf("\n");
+  printF64("r", r);
+  
+}
--- a/examples/fp/softfloat_demo/softfloat_demoQP.c
+++ b/examples/fp/softfloat_demo/softfloat_demoQP.c
@ -0,0 +1,91 @@
+// softfloat_demo3.c
+// james.stine@okstate.edu 15 August 2023
+// 
+// Demonstrate using SoftFloat do compute a floating-point for quad, then print results
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <quadmath.h> // GCC Quad-Math Library
+#include "softfloat.h"
+#include "softfloat_types.h"
+typedef union sp {
+  uint32_t v;
+  float f;
+} sp;
+
+typedef union dp {
+  uint64_t v;
+  double d;
+} dp;
+
+typedef union qp {
+  uint64_t v[2];
+  __float128 q;
+} qp;
+
+
+void printF32 (char *msg, float32_t f) {
+  sp conv;
+  int i, j;
+  conv.v = f.v; // use union to convert between hexadecimal and floating-point views
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%04x_%04x = %g\n", (conv.v >> 16),(conv.v & 0xFFFF), conv.f);
+}
+
+void printF64 (char *msg, float64_t d) {
+  dp conv;
+  int i, j;
+  conv.v = d.v; // use union to convert between hexadecimal and floating-point views
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%08x_%08x = %g\n", (conv.v >> 32),(conv.v & 0xFFFFFFFF), conv.d);
+}
+
+void printF128 (char *msg, float128_t q) {
+  qp conv;
+  int i, j;
+  conv.v[0] = q.v[0]; // use union to convert between hexadecimal and floating-point views
+  conv.v[1] = q.v[1]; // use union to convert between hexadecimal and floating-point views  
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%016" PRIx64 "_%016" PRIx64 " = %1.15Qe\n", q.v[1], q.v[0], conv.q);
+}
+
+void printFlags(void) {
+  int NX = softfloat_exceptionFlags % 2;
+  int UF = (softfloat_exceptionFlags >> 1) % 2;
+  int OF = (softfloat_exceptionFlags >> 2) % 2;
+  int DZ = (softfloat_exceptionFlags >> 3) % 2;
+  int NV = (softfloat_exceptionFlags >> 4) % 2;
+  printf ("Flags: Inexact %d Underflow %d Overflow %d DivideZero %d Invalid %d\n", 
+          NX, UF, OF, DZ, NV);
+}
+
+void softfloatInit(void) {
+    // rounding modes: RNE: softfloat_round_near_even
+    //                 RZ:  softfloat_round_minMag
+    //                 RP:  softfloat_round_max
+    //                 RM:  softfloat_round_min
+    softfloat_roundingMode = softfloat_round_near_even; 
+    softfloat_exceptionFlags = 0; // clear exceptions
+    softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess
+}
+
+int main() {
+  
+  float128_t x, y, z;
+  float128_t r;
+
+  x.v[1] = 0xBFFF988ECE97DFEB;
+  x.v[0] = 0xC3BBA082445B4836;
+  y.v[1] = 0x3F8EFFFFFFFFFFFF;
+  y.v[0] = 0xFFFFFFFFFFFFFFFF;
+  z.v[1] = 0x4001000000000000;
+  z.v[0] = 0x0000000000000000;
+
+  softfloatInit();
+  printF128("X", x); printF128("Y", y); printF128("Z", z);
+  r = f128_mulAdd(x, y, z);
+  printf("\n");
+  printF128("r", r);
+  
+}
--- a/examples/fp/softfloat_demo/softfloat_demoSP.c
+++ b/examples/fp/softfloat_demo/softfloat_demoSP.c
@ -0,0 +1,88 @@
+// softfloat_demo3.c
+// james.stine@okstate.edu 15 August 2023
+// 
+// Demonstrate using SoftFloat do compute a floating-point for quad, then print results
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <quadmath.h> // GCC Quad-Math Library
+#include "softfloat.h"
+#include "softfloat_types.h"
+typedef union sp {
+  uint32_t v;
+  float f;
+} sp;
+
+typedef union dp {
+  uint64_t v;
+  double d;
+} dp;
+
+typedef union qp {
+  uint64_t v[2];
+  __float128 q;
+} qp;
+
+
+void printF32 (char *msg, float32_t f) {
+  sp conv;
+  int i, j;
+  conv.v = f.v; // use union to convert between hexadecimal and floating-point views
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%04x_%04x = %g\n", (conv.v >> 16),(conv.v & 0xFFFF), conv.f);
+}
+
+void printF64 (char *msg, float64_t d) {
+  dp conv;
+  int i, j;
+  conv.v = d.v; // use union to convert between hexadecimal and floating-point views
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%08x_%08x = %g\n", (conv.v >> 32),(conv.v & 0xFFFFFFFF), conv.d);
+}
+
+void printF128 (char *msg, float128_t q) {
+  qp conv;
+  int i, j;
+  conv.v[0] = q.v[0]; // use union to convert between hexadecimal and floating-point views
+  conv.v[1] = q.v[1]; // use union to convert between hexadecimal and floating-point views  
+  printf("%s: ", msg);  // print out nicely
+  printf("0x%016" PRIx64 "_%016" PRIx64 " = %1.15Qe\n", q.v[1], q.v[0], conv.q);
+}
+
+void printFlags(void) {
+  int NX = softfloat_exceptionFlags % 2;
+  int UF = (softfloat_exceptionFlags >> 1) % 2;
+  int OF = (softfloat_exceptionFlags >> 2) % 2;
+  int DZ = (softfloat_exceptionFlags >> 3) % 2;
+  int NV = (softfloat_exceptionFlags >> 4) % 2;
+  printf ("Flags: Inexact %d Underflow %d Overflow %d DivideZero %d Invalid %d\n", 
+          NX, UF, OF, DZ, NV);
+}
+
+void softfloatInit(void) {
+    // rounding modes: RNE: softfloat_round_near_even
+    //                 RZ:  softfloat_round_minMag
+    //                 RP:  softfloat_round_max
+    //                 RM:  softfloat_round_min
+    softfloat_roundingMode = softfloat_round_near_even; 
+    softfloat_exceptionFlags = 0; // clear exceptions
+    softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess
+}
+
+int main() {
+  
+  float32_t x, y, z;
+  float32_t r;
+
+  x.v = 0xBFFF988E;
+  y.v = 0x3F8EFFFF;
+  z.v = 0x40010000;
+
+  softfloatInit();
+  printF32("X", x); printF32("Y", y); printF32("Z", z);
+  r = f32_mulAdd(x, y, z);
+  printf("\n");
+  printF32("r", r);
+  
+}
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@ -1000,7 +1000,7 @@ module testbenchfp;
 endmodule


-module readvectors (
+module readvectors import cvw::*; #(parameter cvw_t P) (
 		    input logic 	        clk,
 		    input logic [P.FLEN*4+7:0]  TestVector,
 		    input logic [P.FMTBITS-1:0] ModFmt,
@ -1026,7 +1026,7 @@ module readvectors (
 		    );

   localparam Q_LEN = 32'd128;
-  `include "parameter-defs.vh"   
+  //`include "parameter-defs.vh"   
   
   logic 					XEn;
   logic 					YEn;