Revert "moved old divsqrt to unusedsrc"

This reverts commit 5dd07c76bd.
2025-02-11 06:05:49 +00:00 · 2022-07-07 16:29:17 -07:00 · 2022-07-07 16:29:17 -07:00 · b1e2a1e5a1
commit b1e2a1e5a1
parent 5dd07c76bd
99 changed files with 19338 additions and 0 deletions
--- a/pipelined/src/fpu/convert_inputs.sv
+++ b/pipelined/src/fpu/convert_inputs.sv
@ -0,0 +1,74 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Floating point divider/square root top unit (Goldschmidt)
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module convert_inputs(
+   input [63:0]  op1,      // 1st input operand (A)
+   input [63:0]  op2,      // 2nd input operand (B)
+   input [2:0]   op_type,  // Function opcode
+   input 	     P,        // Result Precision (0 for double, 1 for single)
+
+   output [63:0] Float1,	// Converted 1st input operand
+   output [63:0] Float2	   // Converted 2nd input operand   
+);
+
+   wire 	 conv_SP;   // Convert from SP to DP
+   wire 	 Zexp1;		// One if the exponent of op1 is zero
+   wire 	 Zexp2;		// One if the exponent of op2 is zero
+   wire 	 Oexp1;		// One if the exponent of op1 is all ones
+   wire 	 Oexp2;		// One if the exponent of op2 is all ones
+
+   // Convert from single precision to double precision if (op_type is 11X
+   // and P is 0) or (op_type is not 11X and P is one). 
+   assign conv_SP = ~P;
+
+   // Test if the input exponent is zero, because if it is then the
+   // exponent of the converted number should be zero. 
+   assign Zexp1 = ~(|op1[30:23]);
+   assign Zexp2 = ~(|op2[30:23]);
+   assign Oexp1 =  (&op1[30:23]);
+   assign Oexp2 =  (&op2[30:23]);
+
+   // Conditionally convert op1. Lower 29 bits are zero for single precision.
+   assign Float1[62:29] = conv_SP ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
+			  : op1[62:29];
+   assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
+
+   // Conditionally convert op2. Lower 29 bits are zero for single precision. 
+   assign Float2[62:29] = conv_SP ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
+			  : op2[62:29];
+   assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
+
+   // Set the sign of Float1 based on its original sign and if the operation
+   // is negation (op_type = 101) or absolute value (op_type = 100)
+
+   assign Float1[63]  = conv_SP ? op1[31] : op1[63];
+   assign Float2[63]  = conv_SP ? op2[31] : op2[63];
+
+endmodule // convert_inputs
+
--- a/pipelined/src/fpu/convert_inputs_div.sv
+++ b/pipelined/src/fpu/convert_inputs_div.sv
@ -0,0 +1,47 @@
+// This module takes as inputs two operands (op1 and op2) 
+// and the result precision (P).  Based on the operation and precision, 
+// it conditionally converts single precision values to double 
+// precision values and modifies the sign of op1. 
+// The converted operands are Float1 and Float2.
+module convert_inputs_div (
+   
+   input logic [63:0]  op1,           // 1st input operand (A)
+   input logic [63:0]  op2,           // 2nd input operand (B)
+   input logic 	     P,             // Result Precision (0 for double, 1 for single)
+   input logic 	     op_type,       // Operation   
+
+   output logic [63:0] Float1,	      // Converted 1st input operand
+   output logic [63:0] Float2b	      // Converted 2nd input operand   
+);
+
+   logic [63:0]        Float2;   
+   logic 	           Zexp1;	      // One if the exponent of op1 is zero
+   logic 	           Zexp2;	      // One if the exponent of op2 is zero
+   logic 	           Oexp1;	      // One if the exponent of op1 is all ones
+   logic 	           Oexp2;	      // One if the exponent of op2 is all ones
+
+   // Test if the input exponent is zero, because if it is then the
+   // exponent of the converted number should be zero. 
+   assign Zexp1 = ~(|op1[30:23]);
+   assign Zexp2 = ~(|op2[30:23]);
+   assign Oexp1 =  (&op1[30:23]);
+   assign Oexp2 =  (&op2[30:23]);
+
+   // Conditionally convert op1. Lower 29 bits are zero for single precision.
+   assign Float1[62:29] = P ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
+			  : op1[62:29];
+   assign Float1[28:0] = op1[28:0] & {29{~P}};
+
+   // Conditionally convert op2. Lower 29 bits are zero for single precision. 
+   assign Float2[62:29] = P ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
+			  : op2[62:29];
+   assign Float2[28:0] = op2[28:0] & {29{~P}};
+
+   // Set the sign of Float1 based on its original sign
+   assign Float1[63]  = P ? op1[31] : op1[63];
+   assign Float2[63]  = P ? op2[31] : op2[63];
+
+   // For sqrt, assign Float2 same as Float1 for simplicity
+   assign Float2b = op_type ? Float1 : Float2;   
+
+endmodule // convert_inputs
--- a/pipelined/src/fpu/divconv.sv
+++ b/pipelined/src/fpu/divconv.sv
@ -0,0 +1,126 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 9/28/2021
+//
+// Purpose: Main convergence routine for floating point divider/square root unit (Goldschmidt)
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divconv (
+   input logic [52:0] 	d, n,
+   input logic [2:0] 	sel_muxa, sel_muxb,
+   input logic 		sel_muxr, 
+   input logic 		load_rega, load_regb, load_regc, load_regd,
+   input logic 		load_regr, load_regs,
+   input logic 		P,
+   input logic 		op_type,
+   input logic 		exp_odd, 
+   input logic 		reset,
+   input logic 		clk, 
+		
+   output logic [59:0] 	q1, qp1, qm1,
+   output logic [59:0] 	q0, qp0, qm0, 
+   output logic [59:0] 	rega_out, regb_out, regc_out, regd_out,
+   output logic [119:0] regr_out
+);
+
+   logic [59:0] 	muxa_out, muxb_out;
+   logic [10:0] 	ia_div, ia_sqrt;
+   logic [59:0] 	ia_out;
+   logic [119:0] 	mul_out;
+   logic [59:0] 	q_out1, qm_out1, qp_out1;
+   logic [59:0] 	q_out0, qm_out0, qp_out0;
+   logic [59:0] 	mcand, mplier, mcand_q;   
+   logic [59:0] 	twocmp_out;
+   logic [60:0] 	three;   
+   logic [119:0] 	constant, constant2;
+   logic [59:0] 	q_const, qp_const, qm_const;
+   logic [59:0] 	d2, n2;   
+   logic 		muxr_out;
+   logic 		cout1, cout2, cout3, cout4, cout5, cout6, cout7;
+
+   // Check if exponent is odd for sqrt
+   // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
+   assign d2 = (exp_odd&op_type) ? {1'b0, d, 6'h0} : {d, 7'h0};
+   assign n2 = op_type ? d2 : {n, 7'h0};
+   
+   // IA div/sqrt
+   sbtm_div ia1 (d[52:41], ia_div);
+   sbtm_sqrt ia2 (d2[59:48], ia_sqrt);
+   assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}};
+   
+   // Choose IA or iteration
+   mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out);
+   mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out);
+
+   // Deal with remainder if [0.5, 1) instead of [1, 2)
+   mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2);
+   // Select Mcand, Remainder/Q''  
+   mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant);
+   // Select mcand - remainder should always choose q1 [1,2) because
+   //   adjustment of N in the from XX.FFFFFFF
+   mux2 #(60) mx4 (q0, q1, q1[59], mcand_q);
+   mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);   
+   mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
+   // Q*D - N (reversed but changed in rounder.v to account for sign reversal)
+   // Add ulp for subtraction in remainder
+   mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
+
+   // Constant for Q''
+   mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const);
+   mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const);
+   mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const);
+   
+   // CPA (from CSA)/Remainder addition/subtraction 
+   assign {cout1, mul_out} = (mcand*mplier) + constant + {119'b0, muxr_out};  
+   
+   // Assuming [1,2) - q1
+   assign {cout2, q_out1} = regb_out + q_const;  
+   assign {cout3, qp_out1} = regb_out + qp_const;  
+   assign {cout4, qm_out1} = regb_out + qm_const + 1'b1;  
+   // Assuming [0.5,1) - q0   
+   assign {cout5, q_out0} = {regb_out[58:0], 1'b0} + q_const;  
+   assign {cout6, qp_out0} = {regb_out[58:0], 1'b0} + qp_const;  
+   assign {cout7, qm_out0} = {regb_out[58:0], 1'b0} + qm_const + 1'b1;    
+
+   // One's complement instead of two's complement (for hw efficiency)
+   assign three = {~mul_out[118], mul_out[118], ~mul_out[117:59]};   
+   mux2 #(60) mxTC (~mul_out[118:59], three[60:1],  op_type, twocmp_out);
+
+   // regs
+   flopenr #(60) regc (clk, reset, load_regc, twocmp_out, regc_out);
+   flopenr #(60) regb (clk, reset, load_regb, mul_out[118:59], regb_out);
+   flopenr #(60) rega (clk, reset, load_rega, mul_out[118:59], rega_out);
+   flopenr #(60) regd (clk, reset, load_regd, mul_out[118:59], regd_out);
+   flopenr #(120) regr (clk, reset, load_regr, mul_out, regr_out);
+   // Assuming [1,2)
+   flopenr #(60) rege (clk, reset, load_regs, {q_out1[59:35], (q_out1[34:6] & {29{~P}}), 6'h0}, q1);   
+   flopenr #(60) regf (clk, reset, load_regs, {qm_out1[59:35], (qm_out1[34:6] & {29{~P}}), 6'h0}, qm1);
+   flopenr #(60) regg (clk, reset, load_regs, {qp_out1[59:35], (qp_out1[34:6] & {29{~P}}), 6'h0}, qp1);
+   // Assuming [0,1)
+   flopenr #(60) regh (clk, reset, load_regs, {q_out0[59:35], (q_out0[34:6] & {29{~P}}), 6'h0}, q0);
+   flopenr #(60) regj (clk, reset, load_regs, {qm_out0[59:35], (qm_out0[34:6] & {29{~P}}), 6'h0}, qm0);
+   flopenr #(60) regk (clk, reset, load_regs, {qp_out0[59:35], (qp_out0[34:6] & {29{~P}}), 6'h0}, qp0);
+   
+endmodule // divconv
--- a/pipelined/src/fpu/divconv_pipe.sv
+++ b/pipelined/src/fpu/divconv_pipe.sv
@ -0,0 +1,198 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Convergence unit for pipelined floating point divider/square root top unit (Goldschmidt)
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
+		     regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk,
+		     load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, load_regp,
+		     P, op_type, exp_odd);
+
+   input logic [52:0]   d, n;
+   input logic [2:0] 	sel_muxa, sel_muxb;
+   input logic 	        sel_muxr;   
+   input logic 	        load_rega, load_regb, load_regc, load_regd;
+   input logic 		load_regr, load_regs;
+   input logic 		load_regp;   
+   input logic 		P;
+   input logic 		op_type;
+   input logic 		exp_odd;   
+   input logic 	        reset;
+   input logic 	        clk;   
+   
+   output logic [59:0] 	q1, qp1, qm1;
+   output logic [59:0] 	q0, qp0, qm0;   
+   output logic [59:0] 	rega_out, regb_out, regc_out, regd_out;
+   output logic [119:0] regr_out;
+   
+   supply1 		vdd;
+   supply0 		vss;   
+
+   logic [59:0] 	muxa_out, muxb_out;
+   logic 		muxr_out;
+   logic [10:0] 	ia_div, ia_sqrt;
+   logic [59:0] 	ia_out;
+   logic [119:0] 	mul_out;
+   logic [59:0] 	q_out1, qm_out1, qp_out1;
+   logic [59:0] 	q_out0, qm_out0, qp_out0;
+   logic [59:0] 	mcand, mplier, mcand_q;   
+   logic [59:0] 	twocmp_out;
+   logic [60:0] 	three;   
+   logic [119:0] 	Carry, Carry2;
+   logic [119:0] 	Sum, Sum2;
+   logic [119:0] 	constant, constant2;
+   logic [59:0] 	q_const, qp_const, qm_const;
+   logic [59:0] 	d2, n2;   
+   logic [11:0] 	d3;   
+
+   // Check if exponent is odd for sqrt
+   // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
+   assign d2 = (exp_odd&op_type) ? {vss, d, 6'h0} : {d, 7'h0};
+   assign n2 = op_type ? d2 : {n, 7'h0};
+   
+   // IA div/sqrt
+   sbtm_div ia1 (d[52:41], ia_div);
+   sbtm_sqrt ia2 (d2[59:48], ia_sqrt);
+   assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}};
+   
+   // Choose IA or iteration
+   mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out);
+   mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out);
+
+   // Deal with remainder if [0.5, 1) instead of [1, 2)
+   mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2);
+   // Select Mcand, Remainder/Q''  
+   mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant);
+   // Select mcand - remainder should always choose q1 [1,2) because
+   //   adjustment of N in the from XX.FFFFFFF
+   mux2 #(60) mx4 (q0, q1, q1[59], mcand_q);
+   mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);   
+   mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
+   // R4 Booth TDM multiplier (carry/save)
+   redundantmul #(60) bigmul(.a(mcand), .b(mplier), .out0(Sum), .out1(Carry));   
+   // Q*D - N (reversed but changed in rounder.v to account for sign reversal)
+   csa #(120) csa1 (Sum, Carry, constant, 1'b0, Sum2, Carry2);
+   // Add ulp for subtraction in remainder
+   mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
+
+   // Constant for Q''
+   mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const);
+   mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const);
+   mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const);
+
+   logic [119:0] 	Sum_pipe;
+   logic [119:0] 	Carry_pipe;
+   logic 		muxr_pipe;   
+   logic 		rega_pipe;
+   logic 		regb_pipe;
+   logic 		regc_pipe;
+   logic 		regd_pipe;
+   logic 		regs_pipe;
+   logic 		regs_pipe2;
+   logic 		regr_pipe;
+   logic 		P_pipe;
+   logic 		op_type_pipe;
+   logic [59:0] 	q_const_pipe;
+   logic [59:0] 	qm_const_pipe;
+   logic [59:0] 	qp_const_pipe;
+   logic [59:0] 	q_const_pipe2;
+   logic [59:0] 	qm_const_pipe2;
+   logic [59:0] 	qp_const_pipe2;      
+   
+   // Stage 1
+   flopenr #(120) regp1 (clk, reset, load_regp, Sum2, Sum_pipe);
+   flopenr #(120) regp2 (clk, reset, load_regp, Carry2, Carry_pipe);
+   flopenr #(1) regp3 (clk, reset, load_regp, muxr_out, muxr_pipe);
+
+   flopenr #(1) regp4 (clk, reset, load_regp, load_rega, rega_pipe);
+   flopenr #(1) regp5 (clk, reset, load_regp, load_regb, regb_pipe);
+   flopenr #(1) regp6 (clk, reset, load_regp, load_regc, regc_pipe);
+   flopenr #(1) regp7 (clk, reset, load_regp, load_regd, regd_pipe);
+   flopenr #(1) regp8 (clk, reset, load_regp, load_regs, regs_pipe);
+   flopenr #(1) regp9 (clk, reset, load_regp, load_regr, regr_pipe);
+   flopenr #(1) regpA (clk, reset, load_regp, P, P_pipe);
+   flopenr #(1) regpB (clk, reset, load_regp, op_type, op_type_pipe);
+   flopenr #(60) regpC (clk, reset, load_regp, q_const, q_const_pipe);
+   flopenr #(60) regpD (clk, reset, load_regp, qp_const, qp_const_pipe);
+   flopenr #(60) regpE (clk, reset, load_regp, qm_const, qm_const_pipe);
+
+   // CPA (from CSA)/Remainder addition/subtraction
+   assign mul_out = Sum_pipe + Carry_pipe + {119'h0, muxr_pipe};   
+   // One's complement instead of two's complement (for hw efficiency)
+   assign three = {~mul_out[118] , mul_out[118], ~mul_out[117:59]};   
+   mux2 #(60) mxTC (~mul_out[118:59], three[60:1],  op_type_pipe, twocmp_out);
+
+   // Stage 2
+   flopenr #(60) regc (clk, reset, regc_pipe, twocmp_out, regc_out);
+   flopenr #(60) regb (clk, reset, regb_pipe, mul_out[118:59], regb_out);
+   flopenr #(60) rega (clk, reset, rega_pipe, mul_out[118:59], rega_out);
+   flopenr #(60) regd (clk, reset, regd_pipe, mul_out[118:59], regd_out);
+   flopenr #(120) regr (clk, reset, regr_pipe, mul_out, regr_out);   
+   flopenr #(1) regl (clk, reset, regs_pipe, regs_pipe, regs_pipe2);
+   flopenr #(60) regm (clk, reset, regs_pipe, q_const_pipe, q_const_pipe2);
+   flopenr #(60) regn (clk, reset, regs_pipe, qp_const_pipe, qp_const_pipe2);
+   flopenr #(60) rego (clk, reset, regs_pipe, qm_const_pipe, qm_const_pipe2);   
+
+   // Assuming [1,2) - q1
+   assign q_out1 = regb_out + q_const;  
+   assign qp_out1 = regb_out + qp_const;  
+   assign qm_out1 = regb_out + qm_const + 1'b1;  
+   // Assuming [0.5,1) - q0   
+   assign q_out0 = {regb_out[58:0], 1'b0} + q_const;  
+   assign qp_out0 = {regb_out[58:0], 1'b0} + qp_const;  
+   assign qm_out0 = {regb_out[58:0], 1'b0} + qm_const + 1'b1;    
+
+   // Stage 3
+   // Assuming [1,2)
+   flopenr #(60) rege (clk, reset, regs_pipe2, {q_out1[59:35], (q_out1[34:6] & {29{~P_pipe}}), 6'h0}, q1);   
+   flopenr #(60) regf (clk, reset, regs_pipe2, {qm_out1[59:35], (qm_out1[34:6] & {29{~P_pipe}}), 6'h0}, qm1);
+   flopenr #(60) regg (clk, reset, regs_pipe2, {qp_out1[59:35], (qp_out1[34:6] & {29{~P_pipe}}), 6'h0}, qp1);
+   // Assuming [0,1)
+   flopenr #(60) regh (clk, reset, regs_pipe2, {q_out0[59:35], (q_out0[34:6] & {29{~P_pipe}}), 6'h0}, q0);
+   flopenr #(60) regj (clk, reset, regs_pipe2, {qm_out0[59:35], (qm_out0[34:6] & {29{~P_pipe}}), 6'h0}, qm0);
+   flopenr #(60) regk (clk, reset, regs_pipe2, {qp_out0[59:35], (qp_out0[34:6] & {29{~P_pipe}}), 6'h0}, qp0);
+   
+endmodule // divconv
+
+// *** rewrote behaviorally dh 5 Jan 2021 for speed
+// module csa #(parameter WIDTH=8) (
+//    input logic [WIDTH-1:0] a, b, c,
+// 	output logic [WIDTH-1:0] sum, carry);
+
+//    assign sum = a ^ b ^ c;
+//    assign carry = (a & (b | c)) | (b & c);
+// /*
+//    logic [WIDTH:0] 					  carry_temp;   
+//    genvar 						  i;
+//        for (i=0;i<WIDTH;i=i+1) begin : genbit
+// 	    fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
+// 	  end
+//    assign carry = {carry_temp[WIDTH-1:1], 1'b0};     
+// */
+// endmodule // csa
--- a/pipelined/src/fpu/exception_div.sv
+++ b/pipelined/src/fpu/exception_div.sv
@ -0,0 +1,82 @@
+// Exception logic for the floating point adder. Note: We may 
+// actually want to move to where the result is computed.
+module exception_div (
+
+   input logic [63:0] A,		// 1st input operand (op1)
+   input logic [63:0] B,		// 2nd input operand (op2)
+   input logic 	    op_type,   // Determine operation   
+   output logic [2:0] Ztype,		// Indicates type of result (Z)
+   output logic       Invalid	// Invalid operation exception
+);
+   
+   logic 	      AzeroM;	 	// '1' if the mantissa of A is zero
+   logic 	      BzeroM;		// '1' if the mantissa of B is zero
+   logic 	      AzeroE;	 	// '1' if the exponent of A is zero
+   logic 	      BzeroE;		// '1' if the exponent of B is zero
+   logic 	      AonesE;	 	// '1' if the exponent of A is all ones
+   logic 	      BonesE;		// '1' if the exponent of B is all ones
+   logic 	      AInf;	 	// '1' if A is infinite
+   logic 	      BInf;	 	// '1' if B is infinite
+   logic 	      AZero;	 	// '1' if A is 0
+   logic 	      BZero;	 	// '1' if B is 0
+   logic 	      ANaN;	 	// '1' if A is a not-a-number
+   logic 	      BNaN; 		// '1' if B is a not-a-number
+   logic 	      ASNaN;	 	// '1' if A is a signalling not-a-number
+   logic 	      BSNaN;	 	// '1' if B is a signalling not-a-number
+   logic 	      ZSNaN;	 	// '1' if result Z is a quiet NaN
+   logic 	      ZInf;	 	// '1' if result Z is an infnity
+   logic 	      Zero;             // '1' if result is zero
+   logic              NegSqrt;          // '1' if sqrt and operand is negative   
+   
+   //***take this module out and add more registers or just recalculate it all
+   // Determine if mantissas are all zeros
+   assign AzeroM = (A[51:0] == 52'h0);
+   assign BzeroM = (B[51:0] == 52'h0);
+
+   // Determine if exponents are all ones or all zeros 
+   assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
+   assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52];
+   assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]);
+   assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
+
+   // Determine special cases. Note: Zero is not really a special case. 
+   assign AInf = AonesE & AzeroM;
+   assign BInf = BonesE & BzeroM;
+   assign ANaN = AonesE & ~AzeroM;
+   assign BNaN = BonesE & ~BzeroM;
+   assign ASNaN = ANaN & A[50];
+   assign BSNaN = ANaN & A[50];
+   assign AZero = AzeroE & AzeroM;
+   assign BZero = BzeroE & BzeroE;
+
+   // Is NaN if operand is negative and its a sqrt
+   assign NegSqrt = (A[63] & op_type & ~AZero);
+
+   // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
+   // or (A and B are both Infinite)
+   assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | 
+		    NegSqrt;
+
+   // The result is a quiet NaN if (an "Invalid Operation" exception occurs) 
+   // or (A is a NaN) or (B is a NaN).
+   assign ZSNaN = Invalid | ANaN | BNaN;
+
+   //  The result is zero
+   assign Zero = (AZero | BInf)&~op_type | AZero&op_type;   
+
+   // The result is +Inf if ((A is Inf) or (B is 0)) and (the
+   // result is not a quiet NaN).  
+   assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN;   
+
+   // Set the type of the result as follows:
+   // Ztype	Result 
+   //  000     Normal
+   //  010     Infinity
+   //  011     Zero
+   //  110     Div by 0
+   //  111     SNaN
+   assign Ztype[2] = (ZSNaN);
+   assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf);
+   assign Ztype[0] = (ZSNaN) | (Zero);
+   
+endmodule // exception
--- a/pipelined/src/fpu/fpdiv.sv
+++ b/pipelined/src/fpu/fpdiv.sv
@ -0,0 +1,132 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Floating point divider/square root top unit (Goldschmidt)
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// `timescale 1ps/1ps
+module fpdiv (
+  input logic 	      clk,
+  input logic 	      reset,
+  input logic 	      start,
+  input logic [63:0]  op1, 
+  input logic [63:0]  op2, 
+  input logic [1:0]   rm, 
+  input logic 	      op_type, 
+  input logic 	      P, 
+  input logic 	      OvEn, 
+  input logic 	      UnEn,
+  input logic 	      XNaNQ,
+  input logic 	      YNaNQ,
+  input logic 	      XZeroQ,
+  input logic 	      YZeroQ,
+  input logic 	      XInfQ,
+  input logic 	      YInfQ, 
+
+  output logic 	      done,
+  output logic 	      FDivBusyE,
+  output logic [63:0] AS_Result, 
+  output logic [4:0]  Flags);
+   
+   logic [63:0]       Float1; 
+   logic [63:0]       Float2;
+   
+   logic [12:0]       exp1, exp2, expF;
+   logic [12:0]       exp_diff, bias;
+   logic [13:0]       exp_sqrt;
+   logic [63:0]       Result;   
+   logic [52:0]       mantissaA;
+   logic [52:0]       mantissaB; 
+   
+   logic [2:0] 	      sel_inv;
+   logic 	      Invalid;
+   logic [4:0] 	      FlagsIn;   	
+   logic 	      signResult;      
+   
+   logic [59:0]       q1, qm1, qp1, q0, qm0, qp0;
+   logic [59:0]       rega_out, regb_out, regc_out, regd_out;
+   logic [119:0]      regr_out;
+   logic [2:0] 	      sel_muxa, sel_muxb;
+   logic 	      sel_muxr;   
+   logic 	      load_rega, load_regb, load_regc, load_regd, load_regr;
+   
+   logic 	      load_regs;
+   logic 	      exp_cout1, exp_cout2;
+   logic 	      exp_odd, open;
+   
+   //  op_type : fdiv=0, fsqrt=1
+   assign Float1 = op1;
+   assign Float2 = op_type ? op1 : op2;   
+   
+   // Exception detection
+   exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
+   
+   // Determine Sign/Mantissa
+   assign signResult = (Float1[63]^Float2[63]);
+   assign mantissaA = {1'b1, Float1[51:0]};
+   assign mantissaB = {1'b1, Float2[51:0]};
+   // Perform Exponent Subtraction - expA - expB + Bias   
+   assign exp1 = {2'b0, Float1[62:52]};
+   assign exp2 = {2'b0, Float2[62:52]};
+   assign bias = {3'h0, 10'h3FF};
+   // Divide exponent
+   assign {exp_cout1, open, exp_diff} = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias};
+   
+   // Sqrt exponent (check if exponent is odd)
+   assign exp_odd = Float1[52] ? 1'b0 : 1'b1;
+   assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd};
+   // Choose correct exponent
+   assign expF = op_type ? exp_sqrt[13:1] : exp_diff;   
+   
+   // Main Goldschmidt/Division Routine   
+   divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out,
+		  .regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr, 
+		  .reset, .clk,  .load_rega, .load_regb, .load_regc, .load_regd,
+		  .load_regr, .load_regs, .P, .op_type, .exp_odd);
+   
+   // FSM : control divider   
+   fsm_fpdiv control (.clk, .reset, .start, .op_type,
+		      .done, .load_rega, .load_regb, .load_regc, .load_regd, 
+		      .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, 
+		      .divBusy(FDivBusyE));
+   
+   // Round the mantissa to a 52-bit value, with the leading one
+   // removed. The rounding units also handles special cases and 
+   // set the exception flags.   
+   rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), 
+   		       .sel_inv, .Invalid, .SignR(signResult),
+		       .Float1(op1), .Float2(op2),
+		       .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, 
+		       .XInfQ, .YInfQ, .op_type,		       
+		       .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, 
+                       .Result, .Flags(FlagsIn));
+   
+   // Store the final result and the exception flags in registers.
+   flopenr #(64) rega (clk, reset, done, Result, AS_Result);  
+   flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);   
+   
+endmodule // fpadd
+
--- a/pipelined/src/fpu/fpdiv_pipe.sv
+++ b/pipelined/src/fpu/fpdiv_pipe.sv
@ -0,0 +1,170 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Floating point divider/square root top unit pipelined version (Goldschmidt)
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module fpdiv_pipe (
+  input logic 	      clk,
+  input logic 	      reset,
+  input logic 	      start,
+  input logic [63:0]  op1, 
+  input logic [63:0]  op2, 
+  input logic [1:0]   rm, 
+  input logic 	      op_type, 
+  input logic 	      P, 
+  input logic 	      OvEn, 
+  input logic 	      UnEn,
+  input logic 	      XNaNQ,
+  input logic 	      YNaNQ,
+  input logic 	      XZeroQ,
+  input logic 	      YZeroQ,
+  input logic 	      XInfQ,
+  input logic 	      YInfQ, 
+
+  output logic 	      done,
+  output logic 	      FDivBusyE,
+  output logic 	      load_preload,
+  output logic [63:0] AS_Result, 
+  output logic [4:0]  Flags);
+
+   supply1 	      vdd;
+   supply0 	      vss;   
+   
+   logic [63:0]       Float1; 
+   logic [63:0]       Float2;
+   logic [63:0]       IntValue;
+   
+   logic [12:0]       exp1, exp2, expF;
+   logic [14:0]       exp_pre_diff;   
+   logic [12:0]       exp_diff, bias;
+   logic [13:0]       exp_sqrt;
+   
+   logic [63:0]       Result;   
+   logic [52:0]       mantissaA;
+   logic [52:0]       mantissaB; 
+   
+   logic [2:0] 	      sel_inv;
+   logic 	      Invalid;
+   logic [4:0] 	      FlagsIn;   	
+   logic 	      exp_gt63;
+   logic 	      Sticky_out;
+   logic 	      signResult, sign_corr;
+   logic 	      corr_sign;
+   logic 	      zeroB;         
+   logic 	      convert;
+   logic 	      swap;
+   logic 	      sub;
+   
+   logic [59:0]       q1, qm1, qp1, q0, qm0, qp0;
+   logic [59:0]       rega_out, regb_out, regc_out, regd_out;
+   logic [119:0]      regr_out;
+   logic [2:0] 	      sel_muxa, sel_muxb;
+   logic 	      sel_muxr;   
+   logic 	      load_rega, load_regb, load_regc, load_regd, load_regr;
+   logic 	      load_regp, load_regs;
+
+   logic 	      exp_odd, exp_odd1;
+   logic 	      start1;   
+   logic 	      P1;
+   logic 	      op_type1;
+   logic [12:0]       expF1;
+   logic [52:0]       mantissaA1;
+   logic [52:0]       mantissaB1;
+   logic [2:0] 	      sel_inv1;
+   logic 	      signResult1;
+   logic 	      Invalid1;   
+
+  //  op_type : fdiv=0, fsqrt=1
+   assign Float1 = op1;
+   assign Float2 = op_type ? op1 : op2;   
+   
+   // Exception detection
+   exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
+
+   // Determine Sign/Mantissa
+   assign signResult = ((Float1[63]^Float2[63])&~op_type);
+   assign mantissaA = {vdd, Float1[51:0]};
+   assign mantissaB = {vdd, Float2[51:0]};
+   
+   // Perform Exponent Subtraction - expA - expB + Bias   
+   assign exp1 = {2'b0, Float1[62:52]};
+   assign exp2 = {2'b0, Float2[62:52]};
+   // bias : DP = 2^{11-1}-1 = 1023
+   assign bias = {3'h0, 10'h3FF};
+   // Divide exponent
+   assign exp_pre_diff = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias};
+   assign exp_diff = exp_pre_diff[12:0];   
+   
+   // Sqrt exponent (check if exponent is odd)
+   assign exp_odd = Float1[52] ? 1'b0 : 1'b1;
+   assign exp_sqrt = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd};   
+   // Choose correct exponent
+   assign expF = op_type ? exp_sqrt[13:1] : exp_diff;   
+
+   flopenr #(1) rega (clk, reset, 1'b1, exp_odd, exp_odd1);
+   flopenr #(1) regb (clk, reset, 1'b1, P, P1);
+   flopenr #(1) regc (clk, reset, 1'b1, op_type, op_type1);
+   flopenr #(13) regd (clk, reset, 1'b1, expF, expF1);
+   flopenr #(53) rege (clk, reset, 1'b1, mantissaA, mantissaA1);
+   flopenr #(53) regf (clk, reset, 1'b1, mantissaB, mantissaB1);
+   flopenr #(1) regg (clk, reset, 1'b1, start, start1);
+   flopenr #(3) regh (clk, reset, 1'b1, sel_inv, sel_inv1);
+   flopenr #(1) regj (clk, reset, 1'b1, signResult, signResult1);
+   flopenr #(1) regk (clk, reset, 1'b1, Invalid, Invalid1);      
+   
+   // Main Goldschmidt/Division Routine
+   divconv_pipe goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, 
+		       .rega_out, .regb_out, .regc_out, .regd_out,
+		       .regr_out, .d(mantissaB1), .n(mantissaA1), 
+		       .sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk,
+		       .load_rega, .load_regb, .load_regc, .load_regd,
+		       .load_regr, .load_regs, .load_regp,
+		       .P(P), .op_type(op_type1), .exp_odd(exp_odd1));
+
+   // FSM : control divider
+   fsm_fpdiv_pipe control (.clk, .reset, .start(start), .op_type(op_type1), .P(P),
+			   .done, .load_rega, .load_regb, .load_regc, .load_regd, 
+			   .load_regr, .load_regs, .load_regp, .load_preload,
+			   .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE));
+   
+   // Round the mantissa to a 52-bit value, with the leading one
+   // removed. The rounding units also handles special cases and 
+   // set the exception flags.
+   rounder_div round1 (.rm, .P(P), .OvEn(1'b0), .UnEn(1'b0), .exp_diff(expF1), 
+   		       .sel_inv(sel_inv1), .Invalid(Invalid1), .SignR(signResult1),
+		       .Float1(op1), .Float2(op2),
+		       .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, 
+		       .XInfQ, .YInfQ, .op_type(op_type1),		       
+		       .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, 
+                       .Result, .Flags(FlagsIn));
+
+   // Store the final result and the exception flags in registers.
+   flopenr #(64) regl (clk, reset, done, Result, AS_Result);
+   flopenr #(5) regn (clk, reset, done, FlagsIn, Flags);   
+   
+endmodule // fpdiv_pipe
+
--- a/pipelined/src/fpu/fpudivsqrtrecur.sv
+++ b/pipelined/src/fpu/fpudivsqrtrecur.sv
@ -0,0 +1,74 @@
+///////////////////////////////////////////
+//
+// Written: David Harris
+// Modified: 11 September 2021
+//
+// Purpose: Recurrence-based SRT Division and Square Root
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module fpudivsqrtrecur (
+    input logic                 clk,
+    input logic                 reset,
+    input logic                 FlushM,     // flush the memory stage
+    input logic                 StallM,     // stall memory stage
+    input logic                 FDivSqrtStart, // start a computation
+    input logic                 FmtE, // precision 1 = double 0 = single
+    input logic                 FDivE, FSqrtE,
+    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic                 XSgnE, YSgnE,    // input signs - execute stage
+    input logic [`NE-1:0]       XExpE, YExpE,    // input exponents - execute stage
+    input logic [`NF:0]         XManE, YManE,    // input mantissa - execute stage
+    input logic                 XDenormE, YDenormE, // is denorm
+    input logic                 XZeroE, YZeroE,     // is zero - execute stage
+    input logic                 XNaNE, YNaNE,        // is NaN
+    input logic                 XSNaNE, YSNaNE,     // is signaling NaN
+    input logic                 XInfE, YInfE, ZInfE,        // is infinity
+    input logic [10:0]          BiasE,      // bias (max exponent/2) ***parameterize in unpacking unit
+    output logic                FDviSqrtBusy, FDivSqrtDone, //currently occpied, or done with operation
+	  output logic [`FLEN-1:0]    FDivSqrtResM,    // result
+	  output logic [4:0]		      FDivSqrtFlgM   // flags
+  );
+   
+  logic FDivSqrtResSgn;
+  logic [`FLEN-1:0] FDivSqrtRecurRes;
+
+  // Radix-2 SRT Division and Square Root
+
+  // Special Cases
+  // *** shift to handle denorms in hardware
+
+  assign FDivSqrtResSgn = FDivE & (XSgnE ^ YSgnE); // Sign is negative for division if inputs have opposite signs
+
+  always_comb begin 
+      if (FSqrtE & XSgnE | FDivE & XZeroE & YZeroE | XNaNE | FDivE & YNaNE) FDivSqrtResM = 0; // ***replace with NAN; // *** which one
+      else if (FDivE & YZeroE | XInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(1), (`NF)'(0)}; // infinity
+      else if (FDivE & YInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(0), (`NF)'(0)}; // zero
+      else FDivSqrtResM = FDivSqrtRecurRes;
+  end
+
+  // *** handle early termination in the special cases
+  // *** handle signaling NANs
+endmodule
--- a/pipelined/src/fpu/fpudivsqrtrecurcore.sv
+++ b/pipelined/src/fpu/fpudivsqrtrecurcore.sv
@ -0,0 +1,105 @@
+///////////////////////////////////////////
+//
+// Written: David Harris
+// Modified: 11 September 2021
+//
+// Purpose: Recurrence-based SRT Division and Square Root
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+// Bit counts:
+// Inputs are originally normalized floating point numbers with NF fractional bits and a leading 1 integer bit
+// x is right shifted by up to 2 to be in the range of 1/4 <= x < 1/2 for divide, 1/4 <= x < 1 for sqrt
+// Hence, x now has NF+2 fractional bits and 0 integer bits
+// d is right shifted by 1 to be in the range of 1/2 <= d < 1.  It thus has NF+1 fractional bits and 0 integer bits
+// q is eventually in the range of 1/4 < q < 1 and hence needs NF+2 bits to keep NF bits when normalized, plus some*** more bits for rounding
+// The partial 
+
+/*  
+module fpudivsqrtrecurcore (
+    input logic                 clk,
+    input logic                 reset,
+    input logic                 start, // start a computation
+    input logic                 busy, // computation running
+    input logic                 fmt, // precision 1 = double 0 = single
+    input logic [`NF+1:0]         x,     // in range 1/4 <= x < 1/2 for divide, 1/4 <=x < 1 for sqrt
+    input logic [`NF+1:0]         din,    // in range 1/2 <= d < 1 for divide
+     input logic                 FDiv, FSqrt, // *** not yet used
+ 	output logic [`FLEN-1:0]    FDivSqrtRecurRes    // result
+  );
+
+  assign FDivSqrtRecurRes = 0;
+ 
+  logic [***] d, ws, wsout, wsnext, wc, wcout, wcnext;
+  logic [1:0] q; // 00 = 0, 01 = 1, 10 = -1
+
+  // Radix-2 SRT Division
+  
+  // registers for divisor and partial remainder
+  flopen #(NF+1) dreg(clk, start, din, d);
+  mux2 #(NF+1) wsmux(wsout, x, start, wsnext);
+  flopen #(NF+1) wsreg(clk, busy, wsnext, ws);
+  mux2 #(NF+1) wcmux(wcout, 0, start, wcnext);
+  flopen #(NF+1) wcreg(clk, busy, wcnext, wc);
+
+  // quotient selection
+  qsel qsel(ws[***4bits], wc[***], q);
+  
+  // partial remainder update
+  always_comb begin // select -d * q to add to partial remainder
+      if      (q[1]) dq = d;
+      else if (q[0]) dq = ~d;
+      else           dq = 0;
+  end
+  csa #(***) csa(ws, wc, dq, q[1], wsout, wcout);
+
+
+endmodule
+*/
+
+/*
+module csa #(parameter N=4) (
+    input logic [N-1:0] sin, cin, ain,
+    input logic carry,
+    output logic [N-1:0] sum, cout
+);
+
+    logic [N-1:0] c;
+
+    assign c = {cin[N-2:0], carry}; // shift carries left and inject optional 1 into lsb
+    assign sum = sin ^ ain ^ c;
+    assign cout = sin & ain | sin & c | ain & c;
+endmodule
+*/
+
+module qsel( // radix 2 SRT division quotient selection
+    input logic [3:0] wc, ws,
+    output logic [1:0] q
+);
+
+endmodule
+
+
+
--- a/pipelined/src/fpu/fsm_fpdiv.sv
+++ b/pipelined/src/fpu/fsm_fpdiv.sv
@ -0,0 +1,537 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 9/28/2021
+//
+// Purpose: FSM for floating point divider/square root unit (Goldschmidt)
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module fsm_fpdiv (
+   input logic 	      clk,
+   input logic 	      reset,
+   input logic 	      start,
+   input logic 	      op_type,
+   output logic       done, 
+   output logic       load_rega, 
+   output logic       load_regb, 
+   output logic       load_regc, 
+   output logic       load_regd,
+   output logic       load_regr,
+   output logic       load_regs,
+   output logic [2:0] sel_muxa, 
+   output logic [2:0] sel_muxb, 
+   output logic       sel_muxr, 
+   output logic       divBusy	   
+   );
+
+   typedef enum       logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
+				   S10, S11, S12, S13, S14, S15, S16, S17, S18, S19,
+				   S20, S21, S22, S23, S24, S25, S26, S27, S28, S29,
+				   S30} statetype;
+   
+   statetype current_state, next_state;
+   
+   always @(posedge clk)
+     begin
+	if (reset == 1'b1)
+	  current_state = S0;
+	else
+	  current_state = next_state;
+     end
+
+   always_comb
+     begin
+ 	case(current_state)
+	  S0:  // iteration 0
+	    begin
+	       if (start==1'b0)
+		 begin
+		    done = 1'b0;
+		    divBusy = 1'b0;	
+		    load_rega = 1'b0;
+		    load_regb = 1'b0;
+		    load_regc = 1'b0;
+		    load_regd = 1'b0;
+		    load_regr = 1'b0;
+		    load_regs = 1'b0;
+		    sel_muxa = 3'b000;
+		    sel_muxb = 3'b000;
+		    sel_muxr = 1'b0;
+		    next_state = S0;
+		 end 
+	       else if (start==1'b1 & op_type==1'b0) 
+		 begin
+		    done = 1'b0;
+		    divBusy = 1'b1;	
+		    load_rega = 1'b0;
+		    load_regb = 1'b1;
+		    load_regc = 1'b0;
+		    load_regd = 1'b0;		    
+		    load_regr = 1'b0;
+		    load_regs = 1'b0;		    		    
+		    sel_muxa = 3'b001;
+		    sel_muxb = 3'b001;		    
+		    sel_muxr = 1'b0;
+		    next_state = S1;
+		 end // if (start==1'b1 & op_type==1'b0)
+	       else if (start==1'b1 & op_type==1'b1) 
+		 begin
+		    done = 1'b0;
+		    divBusy = 1'b1;
+		    load_rega = 1'b0;
+		    load_regb = 1'b1;
+		    load_regc = 1'b0;
+		    load_regd = 1'b0;		    
+		    load_regr = 1'b0;
+		    load_regs = 1'b0;		    		    
+		    sel_muxa = 3'b010;
+		    sel_muxb = 3'b000;		    
+		    sel_muxr = 1'b0;
+		    next_state = S13;
+		 end 	   
+	       else
+		 begin
+		    done = 1'b0;
+		    divBusy = 1'b0;
+		    load_rega = 1'b0;
+		    load_regb = 1'b0;
+		    load_regc = 1'b0;
+		    load_regd = 1'b0;		    
+		    load_regr = 1'b0;
+		    load_regs = 1'b0;		    		    
+		    sel_muxa = 3'b000;
+		    sel_muxb = 3'b000;		    
+		    sel_muxr = 1'b0;
+		    next_state = S0;
+		 end
+	    end // case: S0
+	  S1:
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b010;
+	       sel_muxb = 3'b000;		    
+	       sel_muxr = 1'b0;	
+	       next_state = S2;
+	    end	  
+	  S2: // iteration 1
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b1;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b011;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S3;
+	    end
+	  S3:
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b010;
+	       sel_muxr = 1'b0;
+	       next_state = S4;
+	    end
+	  S4: // iteration 2
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b1;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b011;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S5;
+	    end
+	  S5:
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b010;
+	       sel_muxr = 1'b0;  // add
+	       next_state = S6;
+	    end
+	  S6: // iteration 3
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b1;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;
+	       sel_muxa = 3'b011;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S8;
+	    end
+	  S7:
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b010;
+	       sel_muxr = 1'b0;
+	       next_state = S8;
+	    end // case: S7
+	  S8: // q,qm,qp
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b1;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b0;
+	       next_state = S9;
+	    end 
+	  S9:  // rem
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b1;
+	       load_regs = 1'b0;  
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b1;
+	       next_state = S10;
+	    end 	  
+	  S10:  // done
+	    begin
+	       done = 1'b1;
+	       divBusy = 1'b0;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b0;
+	       next_state = S11;
+	    end // case: S10
+	  S11:  // done
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b0;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b0;
+	       next_state = S0;
+	    end 	  
+	  S13:  // start of sqrt path
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b1;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;
+	       sel_muxa = 3'b010;
+	       sel_muxb = 3'b001;
+	       sel_muxr = 1'b0;
+	       next_state = S14;
+	    end
+	  S14:  
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b001;
+	       sel_muxb = 3'b100;
+	       sel_muxr = 1'b0;
+	       next_state = S15;
+	    end 
+	  S15:  // iteration 1
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b1;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b011;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S16;
+	    end
+	  S16:  
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b1;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S17;
+	    end
+	  S17:  
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b100;
+	       sel_muxb = 3'b010;
+	       sel_muxr = 1'b0;
+	       next_state = S18;
+	    end
+	  S18:  // iteration 2
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b1;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b011;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S19;
+	    end
+	  S19:  
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b1;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S20;
+	    end
+	  S20:  
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b100;
+	       sel_muxb = 3'b010;
+	       sel_muxr = 1'b0;
+	       next_state = S21;
+	    end
+	  S21:  // iteration 3
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b1;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b011;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S22;
+	    end
+	  S22:  
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b1;
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b011;
+	       sel_muxr = 1'b0;
+	       next_state = S23;
+	    end
+	  S23:  
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b1;
+	       load_regb = 1'b0;
+	       load_regc = 1'b1;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b100;
+	       sel_muxb = 3'b010;
+	       sel_muxr = 1'b0;
+	       next_state = S24;
+	    end 
+	  S24: // q,qm,qp
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;
+	       load_regr = 1'b0;
+	       load_regs = 1'b1;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b0;
+	       next_state = S25;
+	    end 	  
+	  S25:  // rem
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b1;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b1;
+	       load_regs = 1'b0;  
+	       sel_muxa = 3'b011;
+	       sel_muxb = 3'b110;
+	       sel_muxr = 1'b1;
+	       next_state = S26;
+	    end 
+	  S26:  // done
+	    begin
+	       done = 1'b1;
+	       divBusy = 1'b0;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b0;
+	       next_state = S27;
+	    end // case: S26
+	  S27:  // done
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b0;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b0;
+	       next_state = S0;
+	    end 	  
+	  default: 
+	    begin
+	       done = 1'b0;
+	       divBusy = 1'b0;
+	       load_rega = 1'b0;
+	       load_regb = 1'b0;
+	       load_regc = 1'b0;
+	       load_regd = 1'b0;	       
+	       load_regr = 1'b0;
+	       load_regs = 1'b0;		    	       
+	       sel_muxa = 3'b000;
+	       sel_muxb = 3'b000;
+	       sel_muxr = 1'b0;
+	       next_state = S0;
+	    end
+	endcase // case(current_state)	
+     end // always @ (current_state or X)   
+
+endmodule // fsm
--- a/pipelined/src/fpu/fsm_fpdiv_pipe.sv
+++ b/pipelined/src/fpu/fsm_fpdiv_pipe.sv
--- a/pipelined/src/fpu/lzd_denorm.sv
+++ b/pipelined/src/fpu/lzd_denorm.sv
@ -0,0 +1,171 @@
+// module lz2 (P, V, B0, B1);
+
+//    input B0;
+//    input B1;
+ 
+//    output P;
+//    output V;
+
+//    assign V = B0 | B1;
+//    assign P = B0 & ~B1;
+   
+// endmodule // lz2
+
+// Note: This module is not made out of two lz2's - why not? (MJS)
+
+// module lz4 (ZP, ZV, B0, B1, V0, V1);
+   
+//    input B0;
+//    input B1;
+//    input V0;
+//    input V1;
+
+//    output [1:0] ZP;
+//    output 	ZV;
+
+//    assign ZP[0] = V0 ? B0 : B1;
+//    assign ZP[1] = ~V0;
+//    assign ZV = V0 | V1;
+
+// endmodule // lz4
+
+// // Note: This module is not made out of two lz4's - why not? (MJS)
+
+// module lz8 (ZP, ZV, B);
+   
+//    input [7:0] B;
+
+//    wire        s1p0;
+//    wire        s1v0;
+//    wire        s1p1;
+//    wire        s1v1;
+//    wire        s2p0;
+//    wire        s2v0;
+//    wire        s2p1;
+//    wire        s2v1;
+//    wire [1:0]  ZPa;
+//    wire [1:0]  ZPb;
+//    wire        ZVa;
+//    wire        ZVb;
+   
+//    output [2:0] ZP;
+//    output       ZV;
+   
+//    lz2 l1(s1p0, s1v0, B[2], B[3]);
+//    lz2 l2(s1p1, s1v1, B[0], B[1]);
+//    lz4 l3(ZPa, ZVa, s1p0, s1p1, s1v0, s1v1);
+
+//    lz2 l4(s2p0, s2v0, B[6], B[7]);
+//    lz2 l5(s2p1, s2v1, B[4], B[5]);
+//    lz4 l6(ZPb, ZVb, s2p0, s2p1, s2v0, s2v1);
+
+//    assign ZP[1:0] = ZVb ? ZPb : ZPa;
+//    assign ZP[2]   = ~ZVb;
+//    assign ZV = ZVa | ZVb;
+
+// endmodule // lz8
+
+// module lz16 (ZP, ZV, B);
+
+//    input [15:0] B;
+
+//    wire [2:0] 	ZPa;
+//    wire [2:0] 	ZPb;
+//    wire 	ZVa;
+//    wire 	ZVb;   
+
+//    output [3:0] ZP;
+//    output 	ZV;
+
+//    lz8 l1(ZPa, ZVa, B[7:0]);
+//    lz8 l2(ZPb, ZVb, B[15:8]);
+
+//    assign ZP[2:0] = ZVb ? ZPb : ZPa;
+//    assign ZP[3]   = ~ZVb;
+//    assign ZV = ZVa | ZVb;
+
+// endmodule // lz16
+
+// module lz32 (ZP, ZV, B);
+
+//    input [31:0] B;
+
+//    wire [3:0] 	ZPa;
+//    wire [3:0] 	ZPb;
+//    wire 	ZVa;
+//    wire 	ZVb;
+
+//    output [4:0] ZP;
+//    output 	ZV;
+
+//    lz16 l1(ZPa, ZVa, B[15:0]);
+//    lz16 l2(ZPb, ZVb, B[31:16]);
+
+//    assign ZP[3:0] = ZVb ? ZPb : ZPa;
+//    assign ZP[4]   = ~ZVb;
+//    assign ZV = ZVa | ZVb;
+
+// endmodule // lz32
+
+// // This module returns the number of leading zeros ZP in the 64-bit 
+// // number B. If there are no ones in B, then ZP and ZV are both 0.
+
+// module lz64 (ZP, ZV, B);
+
+//    input [63:0] B;
+
+//    wire [4:0] 	ZPa;
+//    wire [4:0] 	ZPb;
+//    wire 	ZVa;
+//    wire 	ZVb;   
+
+//    output [5:0] ZP;
+//    output 	ZV;
+
+//    lz32 l1(ZPa, ZVa, B[31:0]);
+//    lz32 l2(ZPb, ZVb, B[63:32]);
+
+//    assign ZV = ZVa | ZVb;
+//    assign ZP[4:0] = (ZVb ? ZPb : ZPa) & {5{ZV}};
+//    assign ZP[5]   = ~ZVb & ZV;
+
+// endmodule // lz64
+
+// This module returns the number of leading zeros ZP in the 52-bit 
+// number B. If there are no ones in B, then ZP and ZV are both 0.
+
+module lz52 (ZP, ZV, B);
+
+   input [51:0] B;
+
+   wire  [4:0]  ZP_32;
+   wire  [3:0]  ZP_16;
+   wire  [1:0]  ZP_4;
+   wire 	ZV_32;
+   wire 	ZV_16;
+   wire 	ZV_4;
+
+   wire 	ZP_2_1;
+   wire 	ZP_2_2;
+   wire 	ZV_2_1;
+   wire 	ZV_2_2;
+
+   output [5:0] ZP;
+   output 	ZV;
+
+   lz32 l1 (ZP_32, ZV_32, B[51:20]);
+   lz16 l2 (ZP_16, ZV_16, B[19:4]);
+
+   lz2 l3_1 (ZP_2_1, ZV_2_1, B[3], B[2]);
+   lz2 l3_2 (ZP_2_2, ZV_2_2, B[1], B[0]);
+   lz4 l3_final (ZP_4, ZV_4, ZP_2_1, ZP_2_2, ZV_2_1, ZV_2_2);
+
+   assign ZV = ZV_32 | ZV_16 | ZV_4;
+   assign ZP[5] = ~ZV_32;
+   assign ZP[4] = ZV_32 ? ZP_32[4] : ~ZV_16;
+   assign ZP[3:2] = ZV_32 ? ZP_32[3:2] : (ZV_16 ? ZP_16[3:2] : 2'b0);
+   assign ZP[1:0] = ZV_32 ? ZP_32[1:0] : (ZV_16 ? ZP_16[1:0] : ZP_4);
+
+endmodule // lz52
+
+
--- a/pipelined/src/fpu/redundantmul.sv
+++ b/pipelined/src/fpu/redundantmul.sv
@ -0,0 +1,58 @@
+///////////////////////////////////////////
+// redundantmul.sv
+//
+// Written: David_Harris@hmc.edu and ssanghai@hm.edu 10/11/2021
+// Modified: 
+//
+// Purpose: multiplier with output in redundant carry-sum form
+//          This can be faster than a mutiplier that requires a final adder to obtain the nonredundant answer.
+//          The module has several implementations controlled by the DESIGN_COMPILER flag.
+//          When DESIGN_COMPILER = 1, use the Synopsys DesignWare DW02_multp block.  This will give highest quality results
+//                                    but doesn't work in simulation or when using different tools
+//          When DESIGN_COMPILER = 2, use the Wally mult_cs block with Radix 2 Booth encoding and a Wallace Tree
+//                                    This simulates and synthesizes, but quality of results ae lower than DesignWare
+//          Otherwise, just use a nonredundant multiplier and set one word to 0.  This is best for FPGAs, which have
+//                                    block multipliers, and also simulates fastest.
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module redundantmul #(parameter WIDTH =8)(
+  input logic [WIDTH-1:0]    a,b,
+  output logic [2*WIDTH-1:0] out0, out1);
+
+  if (`DESIGN_COMPILER == 1) begin:mul
+    logic [2*WIDTH-1+2:0]     tmp_out0; 
+    logic [2*WIDTH-1+2:0]     tmp_out1;   
+
+    DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1));
+    assign out0 = tmp_out0[2*WIDTH-1:0];
+    assign out1 = tmp_out1[2*WIDTH-1:0];
+  end else begin:mul // force a nonredunant multipler.  This will simulate properly and also is appropriate for FPGAs.
+    assign out0 = a * b;
+    assign out1 = 0;
+  end
+endmodule
+
+
--- a/pipelined/src/fpu/rounder_denorm.sv
+++ b/pipelined/src/fpu/rounder_denorm.sv
@ -0,0 +1,259 @@
+// The rounder takes as input logics a 64-bit value to be rounded, A, the 
+// exponent of the value to be rounded, the sign of the final result, Sign, 
+// the precision of the results, P, and the two-bit rounding mode, rm. 
+// It produces a rounded 52-bit result, Z, the exponent of the rounded 
+// result, Z_exp, and a flag that indicates if the result was rounded,
+// Inexact. The rounding mode has the following values.
+//	rm		Modee
+//      00 		round-to-nearest-even
+//	01 		round-toward-zero
+//      10 		round-toward-plus infinity
+//      11  		round-toward-minus infinity
+// The rounding algorithm determines if '1' should be added to the 
+// truncated signficant result, based on three significant bits 
+// (least (L), round (R) and sticky (S)), the rounding mode (rm)
+// and the sign of the final result (Sign). Visually, L and R appear as
+//    xxxxxL,Rxxxxxxx
+// where , denotes the rounding boundary. S is the logical OR of all the
+// bits to the right of R. 
+ 
+module rounder (
+   input logic  [2:0]  rm,
+   input logic         P,
+   input logic         OvEn,
+   input logic         UnEn,
+   input logic         exp_valid,
+   input logic [3:0] 	 sel_inv,
+   input logic	 Invalid,
+   input logic	 DenormIn,
+   input logic         Asign,
+   input logic [10:0]  Aexp,
+   input logic [5:0] 	 norm_shift,
+   input logic [63:0]  A,
+   input logic [10:0]  exponent_postsum,
+   input logic 	 A_Norm,
+   input logic 	 B_Norm,
+   input logic [11:0]  exp_A_unmodified,
+   input logic [11:0]  exp_B_unmodified,
+   input logic 	 normal_overflow,
+   input logic 	 normal_underflow,
+   input logic 	 swap,
+   input logic [2:0]	 op_type,
+   input logic [63:0]  sum,
+   
+   output logic [63:0] Result,
+   output logic 	 DenormIO,
+   output logic [4:0]  Flags
+);
+   
+   wire          Rsign;
+   wire 	 Sticky_out;
+   wire [51:0]	 ShiftMant;
+   wire [63:0]   ShiftMant_64;
+   wire [10:0] 	 Rexp;
+   wire [10:0]   Rexp_denorm;
+   wire [11:0] 	 Texp;			//Parallelized for denorm exponent
+   wire [11:0]   Texp_addone;		//results
+   wire [11:0]   Texp_subone;
+   wire [51:0] 	 Rmant;
+   wire [51:0] 	 Tmant;
+   wire          Rzero;
+   wire          VSS = 1'b0;
+   wire          VDD = 1'b1;
+   wire [51:0] 	 B;			// Value used to add the "ones"
+   wire [11:0]   B_12_overflow;		// Value used to add one to exponent
+   wire [11:0]   B_12_underflow;	// Value used to subtract one from exponent
+   wire		 S_SP;			// Single precision sticky bit
+   wire		 S_DP;			// Double precision sticky bit
+   wire		 S;			// Actual sticky bit
+   wire		 R;			// Round bit
+   wire		 L;			// Least significant bit
+   wire		 add_one;		// '1' if one should be added
+   wire		 UnFlow_SP, UnFlow_DP, UnderFlow; 
+   wire		 OvFlow_SP, OvFlow_DP, OverFlow;		
+   wire		 Inexact;
+   wire		 Round_zero;
+   wire		 Infinite;
+   wire		 VeryLarge;
+   wire		 Largest;
+   wire		 Adj_exp;
+   wire		 Valid;
+   wire		 NaN;
+   wire		 Cout;
+   wire 	 Cout_overflow;
+   wire		 Texp_l7z;
+   wire		 Texp_l7o;
+
+   // Determine the sticky bits for double and single precision
+   assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0];
+   assign S_SP = S_DP |A[38]|A[37]|A[36]|A[35]|A[34]|A[33]|A[32]|A[31]|A[30]|
+                 A[29]|A[28]|A[27]|A[26]|A[25]|A[24]|A[23]|A[22]|A[21]|A[20]|
+                 A[19]|A[18]|A[17]|A[16]|A[15]|A[14]|A[13]|A[12]|A[11]|A[10];
+
+   // Set the least (L), round (R), and sticky (S) bits based on
+   // the precision. 
+   assign {L, R, S} = P ? {A[40],A[39],S_SP} : {A[11],A[10],S_DP};
+
+   // Add one if ((the rounding mode is round-to-nearest) and (R is one) and
+   // (S or L is one)) or ((the rounding mode is towards plus or minus 
+   // infinity (rm[1] = 1)) and (the sign and rm[0] are the same) and 
+   // (R or S is one)). 
+
+   assign add_one = ~rm[2] & ((~rm[1]&~rm[0]&R&(L|S)) | (rm[1]&(Asign^~rm[0])&(R|S))) | (rm[2] & R);
+
+   // Add one using a 52-bit adder. The one is added to the LSB B[0] for
+   // double precision or to B[29] for single precision. 
+   // This could be simplified by using a specialized adder.
+   // The current adder is actually 64-bits. The leading one 
+   // for normalized results in not included in the addition.
+   assign B = {{22{VSS}}, add_one&P, {28{VSS}}, add_one&~P};
+   assign B_12_overflow = {8'h0, 3'b0, normal_overflow};
+   assign B_12_underflow = {8'h0, 3'b0, normal_underflow};
+
+   cla52 add1(Tmant, Cout, A[62:11], B); //***adder
+
+   cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder
+
+   cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder
+
+   // Now that rounding is done, we compute the final exponent
+   // and test for special cases. 
+
+   // Compute the value of the exponent by subtracting the shift 
+   // value from the previous exponent and then adding 2 + cout. 
+   // If needed this could be optimized to used a specialized 
+   // adder. 
+
+   assign Texp = DenormIn ? ({1'b0, exponent_postsum}) : ({VSS, Aexp} - {{6{VSS}}, norm_shift} +{{10{VSS}}, VDD, Cout});   
+   
+   // Overflow only occurs for double precision, if Texp[10] to Texp[0] are 
+   // all ones. To encourage sharing with single precision overflow detection,
+   // the lower 7 bits are tested separately. 
+   assign Texp_l7o  = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
+   assign OvFlow_DP = Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o;
+
+   // Overflow occurs for single precision if (Texp[10] is one)  and 
+   // ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0] 
+   // are all ones. 
+   assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
+
+   // Underflow occurs for double precision if (Texp[11] is one)  or Texp[10] to 
+   // Texp[0] are all zeros. 
+   assign Texp_l7z  = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
+   assign UnFlow_DP = Texp[11] | ~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
+
+   // Underflow occurs for single precision if (Texp[10] is zero)  and 
+   // (Texp[9] or Texp[8] or Texp[7]) is zero. 
+   assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z));
+   
+   // Set the overflow and underflow flags. They should not be set if
+   // the input logic was infinite or NaN or the output logic of the adder is zero.
+   // 00 = Valid
+   // 10 = NaN
+   assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
+   assign NaN   = ~sel_inv[2]&~sel_inv[1]& sel_inv[0];
+   assign UnderFlow = ((P & UnFlow_SP | UnFlow_DP)&Valid&exp_valid) |
+		      (~Aexp[10]&Aexp[9]&Aexp[8]&Aexp[7]&~Aexp[6]
+		       &~Aexp[5]&~Aexp[4]&~Aexp[3]&~Aexp[2]
+		       &~Aexp[1]&~Aexp[0]&sel_inv[3]);
+   assign OverFlow  = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid;
+
+   // The DenormIO is set if underflow has occurred or if their was a
+   // denormalized input logic. 
+   assign DenormIO = DenormIn | UnderFlow;
+
+   // The final result is Inexact if any rounding occurred ((i.e., R or S 
+   // is one), or (if the result overflows ) or (if the result underflows and the 
+   // underflow trap is not enabled)) and (value of the result was not previous set 
+   // by an exception case). 
+   assign Inexact = (R|S|OverFlow|(UnderFlow&~UnEn))&Valid;
+
+   // Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0, 
+   // Invlalid. 
+   assign Flags = {UnderFlow, VSS, OverFlow, Invalid, Inexact};
+
+   // Determine the final result. 
+
+   // The sign of the final result is one if the result is not zero and
+   // the sign of A is one, or if the result is zero and the the rounding 
+   // mode is round-to-minus infinity. The final result is zero, if exp_valid
+   // is zero. If underflow occurs, then the result is set to zero.
+   //   
+   // For Zero (goes equally for subtraction although 
+   // signs may alter operands sign):
+   // -0 + -0 = -0 (always)
+   // +0 + +0 = +0 (always)
+   // -0 + +0 = +0 (for RN, RZ, RU) 
+   // -0 + +0 = -0 (for RD) 
+   assign Rzero = ~exp_valid | UnderFlow;
+   assign Rsign = DenormIn ?
+		  ( ~(op_type[1] | op_type[0]) ? 
+		  ( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ?
+		  ~Asign : Asign) 
+   		  : ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ?
+		  (normal_underflow ? ~Asign : Asign) : Asign)
+		  ) 
+		  : ( ((Asign&exp_valid | 
+     	          (sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] |
+	          sel_inv[2]&sel_inv[1]&~sel_inv[0] |		  
+	          ~exp_valid&rm[1]&rm[0]&~sel_inv[2] | 
+	          UnderFlow&rm[1]&rm[0])) & ~sel_inv[3]) |
+		  (Asign & sel_inv[3]) );
+   
+   // The exponent of the final result is zero if the final result is 
+   // zero or a denorm, all ones if the final result is NaN or Infinite
+   // or overflow occurred and the magnitude of the number is 
+   // not rounded toward from zero, and all ones with an LSB of zero
+   // if overflow occurred and the magnitude of the number is 
+   // rounded toward zero. If the result is single precision, 
+   // Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
+   // and overflow occurs and the operation is not conversion, bits 10 and 9 are 
+   // inverted for double precision, and bits 7 and 6 are inverted for single precision. 
+   assign Round_zero = ~rm[1]&rm[0] | ~Asign&rm[0] | Asign&rm[1]&~rm[0];
+   assign VeryLarge = OverFlow & ~OvEn;
+   assign Infinite   = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]);
+   assign Largest = VeryLarge & Round_zero;
+   assign Adj_exp = OverFlow & OvEn;
+   assign Rexp[10:1] = ({10{~Valid}} | 
+			{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], 
+			 (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | 
+		        {10{VeryLarge}})&{10{~Rzero | NaN}};
+   assign Rexp[0]    = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
+   
+   // The denormalized rounded exponent uses the overflow/underflow values
+   // computed in the fpadd component to round the exponent up or down 
+   // Depending on the operation and the signs of the orignal operands,
+   // underflow may or may not be needed to round.
+   assign Rexp_denorm = DenormIn ? 
+			((~op_type[1] & op_type[0]) ? 
+				( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ? 
+					( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) 
+					: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) 
+				: ( ((A_Norm != B_Norm) & (exp_A_unmodified[11] != exp_B_unmodified[11])) ?	
+					( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) 
+					: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) 
+				) : 
+			Rexp; //KEP used to be all of exp_A_unmodified
+
+   // If the result is zero or infinity, the mantissa is all zeros. 
+   // If the result is NaN, the mantissa is 10...0
+   // If the result the largest floating point number, the mantissa
+   // is all ones. Otherwise, the mantissa is not changed. 
+   // If operation is denormalized, take the mantissa directly from
+   // its normalized value. 
+   assign Rmant[51] = Largest | NaN | (Tmant[51]&~Infinite&~Rzero);
+   assign Rmant[50:0] = {51{Largest}} | (Tmant[50:0]&{51{~Infinite&Valid&~Rzero}});
+
+   assign ShiftMant = A[51:0];
+
+   // For single precision, the 8 least significant bits of the exponent
+   // and 23 most significant bits of the mantissa contain bits used 
+   // for the final result. A double precision result is returned if 
+   // overflow has occurred, the overflow trap is enabled, and a conversion
+   // is being performed. 
+
+   assign Result = DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : (P ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]}
+	           : {Rsign, Rexp, Rmant});
+
+endmodule // rounder
+
--- a/pipelined/src/fpu/rounder_div.sv
+++ b/pipelined/src/fpu/rounder_div.sv
@ -0,0 +1,212 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Floating point divider/square root rounder unit (Goldschmidt)
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module rounder_div (
+    input logic [1:0] 	rm,
+    input logic 	P,
+    input logic 	OvEn,
+    input logic 	UnEn,
+    input logic [12:0] 	exp_diff,
+    input logic [2:0] 	sel_inv,
+    input logic 	Invalid,
+    input logic 	SignR,
+    input logic [63:0] 	Float1,
+    input logic [63:0] 	Float2,
+    input logic 	XNaNQ,
+    input logic 	YNaNQ,
+    input logic 	XZeroQ,
+    input logic 	YZeroQ, 
+    input logic 	XInfQ,
+    input logic 	YInfQ,
+    input logic 	op_type, 
+    input logic [59:0] 	q1,
+    input logic [59:0] 	qm1,
+    input logic [59:0] 	qp1,
+    input logic [59:0] 	q0,
+    input logic [59:0] 	qm0,
+    input logic [59:0] 	qp0, 
+    input logic [119:0] regr_out,
+   
+    output logic [63:0] Result,
+    output logic [4:0] 	Flags
+    );
+      
+   logic 		Rsign;
+   logic [10:0] 	Rexp;
+   logic [12:0] 	Texp;
+   logic [51:0] 	Rmant;
+   logic [59:0] 	Tmant;
+   logic [51:0] 	Smant;   
+   logic 		Rzero;
+   logic 	       Gdp, Gsp, G;
+   logic 	       UnFlow_SP, UnFlow_DP, UnderFlow; 
+   logic 	       OvFlow_SP, OvFlow_DP, OverFlow;		
+   logic 	       Inexact;
+   logic 	       Round_zero;
+   logic 	       Infinite;
+   logic 	       VeryLarge;
+   logic 	       Largest;
+   logic 	       Div0;      
+   logic 	       Adj_exp;
+   logic 	       Valid;
+   logic 	       NaN;
+   logic 	       Texp_l7z;
+   logic 	       Texp_l7o;
+   logic 	       OvCon;
+   logic 	       zero_rem;
+   logic [1:0] 	       mux_mant;
+   logic 	       sign_rem;
+   logic [59:0]        q, qm, qp;
+   logic 	       exp_ovf;
+
+   logic [50:0]        NaN_out;
+   logic 	       NaN_Sign_out;   
+   logic 	       Sign_out;     
+
+   // Remainder = 0?
+   assign zero_rem = ~(|regr_out);
+   // Remainder Sign
+   assign sign_rem = ~regr_out[119];
+   // choose correct Guard bit [1,2) or [0,1)
+   assign Gdp = q1[59] ? q1[6] : q0[6];
+   assign Gsp = q1[59] ? q1[35] : q0[35];
+   assign G = P ? Gsp : Gdp;   
+   // Selection of Rounding (from logic/switching)
+   assign mux_mant[1] = (SignR&rm[1]&rm[0]&G) | (!SignR&rm[1]&!rm[0]&G) | 
+			(!rm[1]&!rm[0]&G&!sign_rem) | 
+			(SignR&rm[1]&rm[0]&!zero_rem&!sign_rem) | 
+			(!SignR&rm[1]&!rm[0]&!zero_rem&!sign_rem);
+   assign mux_mant[0] = (!SignR&rm[0]&!G&!zero_rem&sign_rem) | 
+			(!rm[1]&rm[0]&!G&!zero_rem&sign_rem) | 
+			(SignR&rm[1]&!rm[0]&!G&!zero_rem&sign_rem);
+   
+   // Which Q?
+   mux2 #(60) mx1 (q0, q1, q1[59], q);
+   mux2 #(60) mx2 (qm0, qm1, q1[59], qm);   
+   mux2 #(60) mx3 (qp0, qp1, q1[59], qp);
+   // Choose Q, Q+1, Q-1
+   mux3 #(60) mx4 (q, qm, qp, mux_mant, Tmant);
+   assign Smant = Tmant[58:7];
+   // Compute the value of the exponent
+   //   exponent is modified if we choose:
+   //   1.) we choose any qm0, qp0, q0 (since we shift mant)
+   //   2.) we choose qp and we overflow (for RU)
+   assign exp_ovf = |{qp[58:36], (qp[35:7] & {29{~P}})};
+   assign Texp = exp_diff - {{12{1'b0}}, ~q1[59]} + {{12{1'b0}}, mux_mant[1]&qp1[59]&~exp_ovf};
+   
+   // Overflow only occurs for double precision, if Texp[10] to Texp[0] are 
+   // all ones. To encourage sharing with single precision overflow detection,
+   // the lower 7 bits are tested separately. 
+   assign Texp_l7o  = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
+   assign OvFlow_DP = (~Texp[12]&Texp[11]) | (Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o);
+
+   // Overflow occurs for single precision if (Texp[10] is one)  and 
+   // ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0] 
+   // are all ones. 
+   assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
+
+   // Underflow occurs for double precision if (Texp[11]/Texp[10] is one) or 
+   // Texp[10] to Texp[0] are all zeros. 
+   assign Texp_l7z  = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
+   assign UnFlow_DP = (Texp[12]&Texp[11]) | ~Texp[11]&~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
+   
+   // Underflow occurs for single precision if (Texp[10] is zero)  and 
+   // (Texp[9] or Texp[8] or Texp[7]) is zero. 
+   assign UnFlow_SP = ~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z);
+   
+   // Set the overflow and underflow flags. They should not be set if
+   // the input was infinite or NaN or the output of the adder is zero.
+   // 00 = Valid
+   // 10 = NaN
+   assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
+   assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0]; 
+   assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
+   assign OverFlow  = (P & OvFlow_SP | OvFlow_DP) & Valid;
+   assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;   
+
+   // The final result is Inexact if any rounding occurred ((i.e., R or S 
+   // is one), or (if the result overflows ) or (if the result underflows and the 
+   // underflow trap is not enabled)) and (value of the result was not previous set 
+   // by an exception case). 
+   assign Inexact = (G|~zero_rem|OverFlow|(UnderFlow&~UnEn))&Valid;
+
+   // Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0, 
+   // Invlalid. 
+   assign Flags = {Inexact, UnderFlow, OverFlow, Div0, Invalid};
+
+   // Determine sign
+   assign Rzero = UnderFlow | (~sel_inv[2]&sel_inv[1]&sel_inv[0]);
+   assign Rsign = SignR;   
+      
+   // The exponent of the final result is zero if the final result is 
+   // zero or a denorm, all ones if the final result is NaN or Infinite
+   // or overflow occurred and the magnitude of the number is 
+   // not rounded toward from zero, and all ones with an LSB of zero
+   // if overflow occurred and the magnitude of the number is 
+   // rounded toward zero. If the result is single precision, 
+   // Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
+   // and overflow occurs and the operation is not conversion, bits 10 and 9 are 
+   // inverted for double precision, and bits 7 and 6 are inverted for single precision. 
+   assign Round_zero = ~rm[1]&rm[0] | ~SignR&rm[0] | SignR&rm[1]&~rm[0];
+   assign VeryLarge = OverFlow & ~OvEn;
+   assign Infinite   = (VeryLarge & ~Round_zero) | sel_inv[1];
+   assign Largest = VeryLarge & Round_zero;
+   assign Adj_exp = OverFlow & OvEn;
+   assign Rexp[10:1] = ({10{~Valid}} | 
+			{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], 
+			 (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | 
+		        {10{VeryLarge}})&{10{~Rzero | NaN}};
+   assign Rexp[0]    = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
+   
+   // If the result is zero or infinity, the mantissa is all zeros. 
+   // If the result is NaN, the mantissa is 10...0
+   // If the result the largest floating point number, the mantissa
+   // is all ones. Otherwise, the mantissa is not changed.
+   assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
+   assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
+   assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ | 
+   		     NaN_Sign_out&(XNaNQ|YNaNQ);
+   // FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
+   // | Float1[63]&op_type;  (logic to fix this but removed for now)
+   
+   assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
+   assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
+			(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
+   
+   // For single precision, the 8 least significant bits of the exponent
+   // and 23 most significant bits of the mantissa contain bits used 
+   // for the final result. A double precision result is returned if 
+   // overflow has occurred, the overflow trap is enabled, and a conversion
+   // is being performed. 
+   assign OvCon = OverFlow & OvEn;
+   assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
+	           : {Sign_out, Rexp, Rmant};
+
+endmodule // rounder
+
--- a/pipelined/src/fpu/sbtm_a0.sv
+++ b/pipelined/src/fpu/sbtm_a0.sv
@ -0,0 +1,170 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Bipartite Lookup
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module sbtm_a0 (input  logic [6:0] a,
+		output logic [12:0] y);
+   
+   always_comb
+     case(a)
+       7'b0000000: y = 13'b1111111100010;
+       7'b0000001: y = 13'b1111110100011;
+       7'b0000010: y = 13'b1111101100101;
+       7'b0000011: y = 13'b1111100101000;
+       7'b0000100: y = 13'b1111011101100;
+       7'b0000101: y = 13'b1111010110000;
+       7'b0000110: y = 13'b1111001110110;
+       7'b0000111: y = 13'b1111000111100;
+       7'b0001000: y = 13'b1111000000100;
+       7'b0001001: y = 13'b1110111001100;
+       7'b0001010: y = 13'b1110110010101;
+       7'b0001011: y = 13'b1110101011110;
+       7'b0001100: y = 13'b1110100101001;
+       7'b0001101: y = 13'b1110011110100;
+       7'b0001110: y = 13'b1110011000000;
+       7'b0001111: y = 13'b1110010001101;
+       7'b0010000: y = 13'b1110001011010;
+       7'b0010001: y = 13'b1110000101000;
+       7'b0010010: y = 13'b1101111110111;
+       7'b0010011: y = 13'b1101111000110;
+       7'b0010100: y = 13'b1101110010111;
+       7'b0010101: y = 13'b1101101100111;
+       7'b0010110: y = 13'b1101100111001;
+       7'b0010111: y = 13'b1101100001011;
+       7'b0011000: y = 13'b1101011011101;
+       7'b0011001: y = 13'b1101010110001;
+       7'b0011010: y = 13'b1101010000100;
+       7'b0011011: y = 13'b1101001011001;
+       7'b0011100: y = 13'b1101000101110;
+       7'b0011101: y = 13'b1101000000011;
+       7'b0011110: y = 13'b1100111011001;
+       7'b0011111: y = 13'b1100110101111;
+       7'b0100000: y = 13'b1100110000110;
+       7'b0100001: y = 13'b1100101011110;
+       7'b0100010: y = 13'b1100100110110;
+       7'b0100011: y = 13'b1100100001111;
+       7'b0100100: y = 13'b1100011101000;
+       7'b0100101: y = 13'b1100011000001;
+       7'b0100110: y = 13'b1100010011011;
+       7'b0100111: y = 13'b1100001110101;
+       7'b0101000: y = 13'b1100001010000;
+       7'b0101001: y = 13'b1100000101011;
+       7'b0101010: y = 13'b1100000000111;
+       7'b0101011: y = 13'b1011111100011;
+       7'b0101100: y = 13'b1011111000000;
+       7'b0101101: y = 13'b1011110011101;
+       7'b0101110: y = 13'b1011101111010;
+       7'b0101111: y = 13'b1011101011000;
+       7'b0110000: y = 13'b1011100110110;
+       7'b0110001: y = 13'b1011100010101;
+       7'b0110010: y = 13'b1011011110011;
+       7'b0110011: y = 13'b1011011010011;
+       7'b0110100: y = 13'b1011010110010;
+       7'b0110101: y = 13'b1011010010010;
+       7'b0110110: y = 13'b1011001110011;
+       7'b0110111: y = 13'b1011001010011;
+       7'b0111000: y = 13'b1011000110100;
+       7'b0111001: y = 13'b1011000010110;
+       7'b0111010: y = 13'b1010111110111;
+       7'b0111011: y = 13'b1010111011001;
+       7'b0111100: y = 13'b1010110111100;
+       7'b0111101: y = 13'b1010110011110;
+       7'b0111110: y = 13'b1010110000001;
+       7'b0111111: y = 13'b1010101100100;
+       7'b1000000: y = 13'b1010101001000;
+       7'b1000001: y = 13'b1010100101100;
+       7'b1000010: y = 13'b1010100010000;
+       7'b1000011: y = 13'b1010011110100;
+       7'b1000100: y = 13'b1010011011001;
+       7'b1000101: y = 13'b1010010111110;
+       7'b1000110: y = 13'b1010010100011;
+       7'b1000111: y = 13'b1010010001001;
+       7'b1001000: y = 13'b1010001101111;
+       7'b1001001: y = 13'b1010001010101;
+       7'b1001010: y = 13'b1010000111011;
+       7'b1001011: y = 13'b1010000100001;
+       7'b1001100: y = 13'b1010000001000;
+       7'b1001101: y = 13'b1001111101111;
+       7'b1001110: y = 13'b1001111010111;
+       7'b1001111: y = 13'b1001110111110;
+       7'b1010000: y = 13'b1001110100110;
+       7'b1010001: y = 13'b1001110001110;
+       7'b1010010: y = 13'b1001101110110;
+       7'b1010011: y = 13'b1001101011111;
+       7'b1010100: y = 13'b1001101000111;
+       7'b1010101: y = 13'b1001100110000;
+       7'b1010110: y = 13'b1001100011001;
+       7'b1010111: y = 13'b1001100000010;
+       7'b1011000: y = 13'b1001011101100;
+       7'b1011001: y = 13'b1001011010110;
+       7'b1011010: y = 13'b1001011000000;
+       7'b1011011: y = 13'b1001010101010;
+       7'b1011100: y = 13'b1001010010100;
+       7'b1011101: y = 13'b1001001111111;
+       7'b1011110: y = 13'b1001001101001;
+       7'b1011111: y = 13'b1001001010100;
+       7'b1100000: y = 13'b1001000111111;
+       7'b1100001: y = 13'b1001000101011;
+       7'b1100010: y = 13'b1001000010110;
+       7'b1100011: y = 13'b1001000000010;
+       7'b1100100: y = 13'b1000111101110;
+       7'b1100101: y = 13'b1000111011010;
+       7'b1100110: y = 13'b1000111000110;
+       7'b1100111: y = 13'b1000110110010;
+       7'b1101000: y = 13'b1000110011111;
+       7'b1101001: y = 13'b1000110001011;
+       7'b1101010: y = 13'b1000101111000;
+       7'b1101011: y = 13'b1000101100101;
+       7'b1101100: y = 13'b1000101010010;
+       7'b1101101: y = 13'b1000101000000;
+       7'b1101110: y = 13'b1000100101101;
+       7'b1101111: y = 13'b1000100011011;
+       7'b1110000: y = 13'b1000100001001;
+       7'b1110001: y = 13'b1000011110110;
+       7'b1110010: y = 13'b1000011100101;
+       7'b1110011: y = 13'b1000011010011;
+       7'b1110100: y = 13'b1000011000001;
+       7'b1110101: y = 13'b1000010110000;
+       7'b1110110: y = 13'b1000010011110;
+       7'b1110111: y = 13'b1000010001101;
+       7'b1111000: y = 13'b1000001111100;
+       7'b1111001: y = 13'b1000001101011;
+       7'b1111010: y = 13'b1000001011010;
+       7'b1111011: y = 13'b1000001001010;
+       7'b1111100: y = 13'b1000000111001;
+       7'b1111101: y = 13'b1000000101001;
+       7'b1111110: y = 13'b1000000011001;
+       7'b1111111: y = 13'b1000000001001;	    
+       default: y = 13'bxxxxxxxxxxxxx;
+     endcase // case (a)
+    
+endmodule // sbtm_a0
+
+    
+    
+    
--- a/pipelined/src/fpu/sbtm_a1.sv
+++ b/pipelined/src/fpu/sbtm_a1.sv
@ -0,0 +1,170 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Bipartite Lookup
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module sbtm_a1 (input  logic [6:0] a,
+		output logic [4:0] y);
+   
+   always_comb
+     case(a)
+       7'b0000000: y = 5'b11100;
+       7'b0000001: y = 5'b11000;
+       7'b0000010: y = 5'b10100;
+       7'b0000011: y = 5'b10000;
+       7'b0000100: y = 5'b01101;
+       7'b0000101: y = 5'b01001;
+       7'b0000110: y = 5'b00101;
+       7'b0000111: y = 5'b00001;
+       7'b0001000: y = 5'b11001;
+       7'b0001001: y = 5'b10101;
+       7'b0001010: y = 5'b10010;
+       7'b0001011: y = 5'b01111;
+       7'b0001100: y = 5'b01011;
+       7'b0001101: y = 5'b01000;
+       7'b0001110: y = 5'b00101;
+       7'b0001111: y = 5'b00001;
+       7'b0010000: y = 5'b10110;
+       7'b0010001: y = 5'b10011;
+       7'b0010010: y = 5'b10000;
+       7'b0010011: y = 5'b01101;
+       7'b0010100: y = 5'b01010;
+       7'b0010101: y = 5'b00111;
+       7'b0010110: y = 5'b00100;
+       7'b0010111: y = 5'b00001;
+       7'b0011000: y = 5'b10100;
+       7'b0011001: y = 5'b10001;
+       7'b0011010: y = 5'b01110;
+       7'b0011011: y = 5'b01100;
+       7'b0011100: y = 5'b01001;
+       7'b0011101: y = 5'b00110;
+       7'b0011110: y = 5'b00100;
+       7'b0011111: y = 5'b00001;
+       7'b0100000: y = 5'b10010;
+       7'b0100001: y = 5'b01111;
+       7'b0100010: y = 5'b01101;
+       7'b0100011: y = 5'b01010;
+       7'b0100100: y = 5'b01000;
+       7'b0100101: y = 5'b00110;
+       7'b0100110: y = 5'b00011;
+       7'b0100111: y = 5'b00001;
+       7'b0101000: y = 5'b10000;
+       7'b0101001: y = 5'b01110;
+       7'b0101010: y = 5'b01100;
+       7'b0101011: y = 5'b01001;
+       7'b0101100: y = 5'b00111;
+       7'b0101101: y = 5'b00101;
+       7'b0101110: y = 5'b00011;
+       7'b0101111: y = 5'b00001;
+       7'b0110000: y = 5'b01111;
+       7'b0110001: y = 5'b01101;
+       7'b0110010: y = 5'b01011;
+       7'b0110011: y = 5'b01001;
+       7'b0110100: y = 5'b00111;
+       7'b0110101: y = 5'b00101;
+       7'b0110110: y = 5'b00011;
+       7'b0110111: y = 5'b00001;
+       7'b0111000: y = 5'b01101;
+       7'b0111001: y = 5'b01100;
+       7'b0111010: y = 5'b01010;
+       7'b0111011: y = 5'b01000;
+       7'b0111100: y = 5'b00110;
+       7'b0111101: y = 5'b00100;
+       7'b0111110: y = 5'b00010;
+       7'b0111111: y = 5'b00000;
+       7'b1000000: y = 5'b01100;
+       7'b1000001: y = 5'b01011;
+       7'b1000010: y = 5'b01001;
+       7'b1000011: y = 5'b00111;
+       7'b1000100: y = 5'b00101;
+       7'b1000101: y = 5'b00100;
+       7'b1000110: y = 5'b00010;
+       7'b1000111: y = 5'b00000;
+       7'b1001000: y = 5'b01011;
+       7'b1001001: y = 5'b01010;
+       7'b1001010: y = 5'b01000;
+       7'b1001011: y = 5'b00111;
+       7'b1001100: y = 5'b00101;
+       7'b1001101: y = 5'b00011;
+       7'b1001110: y = 5'b00010;
+       7'b1001111: y = 5'b00000;
+       7'b1010000: y = 5'b01010;
+       7'b1010001: y = 5'b01001;
+       7'b1010010: y = 5'b01000;
+       7'b1010011: y = 5'b00110;
+       7'b1010100: y = 5'b00101;
+       7'b1010101: y = 5'b00011;
+       7'b1010110: y = 5'b00010;
+       7'b1010111: y = 5'b00000;
+       7'b1011000: y = 5'b01010;
+       7'b1011001: y = 5'b01000;
+       7'b1011010: y = 5'b00111;
+       7'b1011011: y = 5'b00110;
+       7'b1011100: y = 5'b00100;
+       7'b1011101: y = 5'b00011;
+       7'b1011110: y = 5'b00010;
+       7'b1011111: y = 5'b00000;
+       7'b1100000: y = 5'b01001;
+       7'b1100001: y = 5'b01000;
+       7'b1100010: y = 5'b00110;
+       7'b1100011: y = 5'b00101;
+       7'b1100100: y = 5'b00100;
+       7'b1100101: y = 5'b00011;
+       7'b1100110: y = 5'b00001;
+       7'b1100111: y = 5'b00000;
+       7'b1101000: y = 5'b01000;
+       7'b1101001: y = 5'b00111;
+       7'b1101010: y = 5'b00110;
+       7'b1101011: y = 5'b00101;
+       7'b1101100: y = 5'b00100;
+       7'b1101101: y = 5'b00010;
+       7'b1101110: y = 5'b00001;
+       7'b1101111: y = 5'b00000;
+       7'b1110000: y = 5'b01000;
+       7'b1110001: y = 5'b00111;
+       7'b1110010: y = 5'b00110;
+       7'b1110011: y = 5'b00100;
+       7'b1110100: y = 5'b00011;
+       7'b1110101: y = 5'b00010;
+       7'b1110110: y = 5'b00001;
+       7'b1110111: y = 5'b00000;
+       7'b1111000: y = 5'b00111;
+       7'b1111001: y = 5'b00110;
+       7'b1111010: y = 5'b00101;
+       7'b1111011: y = 5'b00100;
+       7'b1111100: y = 5'b00011;
+       7'b1111101: y = 5'b00010;
+       7'b1111110: y = 5'b00001;
+       7'b1111111: y = 5'b00000;	    
+       default: y = 5'bxxxxx;
+     endcase // case (a)
+    
+endmodule // sbtm_a0
+
+    
+    
+    
--- a/pipelined/src/fpu/sbtm_a2.sv
+++ b/pipelined/src/fpu/sbtm_a2.sv
@ -0,0 +1,234 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Bipartite Lookup
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module sbtm_a2 (input  logic [7:0] a,
+		output logic [13:0] y);
+   
+   always_comb
+     case(a)
+       8'b01000000: y = 14'b10110100010111;
+       8'b01000001: y = 14'b10110010111111;
+       8'b01000010: y = 14'b10110001101000;
+       8'b01000011: y = 14'b10110000010011;
+       8'b01000100: y = 14'b10101111000001;
+       8'b01000101: y = 14'b10101101110000;
+       8'b01000110: y = 14'b10101100100001;
+       8'b01000111: y = 14'b10101011010011;
+       8'b01001000: y = 14'b10101010000111;
+       8'b01001001: y = 14'b10101000111101;
+       8'b01001010: y = 14'b10100111110100;
+       8'b01001011: y = 14'b10100110101101;
+       8'b01001100: y = 14'b10100101100111;
+       8'b01001101: y = 14'b10100100100010;
+       8'b01001110: y = 14'b10100011011111;
+       8'b01001111: y = 14'b10100010011101;
+       8'b01010000: y = 14'b10100001011100;
+       8'b01010001: y = 14'b10100000011100;
+       8'b01010010: y = 14'b10011111011110;
+       8'b01010011: y = 14'b10011110100001;
+       8'b01010100: y = 14'b10011101100100;
+       8'b01010101: y = 14'b10011100101001;
+       8'b01010110: y = 14'b10011011101111;
+       8'b01010111: y = 14'b10011010110110;
+       8'b01011000: y = 14'b10011001111110;
+       8'b01011001: y = 14'b10011001000110;
+       8'b01011010: y = 14'b10011000010000;
+       8'b01011011: y = 14'b10010111011011;
+       8'b01011100: y = 14'b10010110100110;
+       8'b01011101: y = 14'b10010101110011;
+       8'b01011110: y = 14'b10010101000000;
+       8'b01011111: y = 14'b10010100001110;
+       8'b01100000: y = 14'b10010011011100;
+       8'b01100001: y = 14'b10010010101100;
+       8'b01100010: y = 14'b10010001111100;
+       8'b01100011: y = 14'b10010001001101;
+       8'b01100100: y = 14'b10010000011111;
+       8'b01100101: y = 14'b10001111110001;
+       8'b01100110: y = 14'b10001111000100;
+       8'b01100111: y = 14'b10001110011000;
+       8'b01101000: y = 14'b10001101101100;
+       8'b01101001: y = 14'b10001101000001;
+       8'b01101010: y = 14'b10001100010110;
+       8'b01101011: y = 14'b10001011101100;
+       8'b01101100: y = 14'b10001011000011;
+       8'b01101101: y = 14'b10001010011010;
+       8'b01101110: y = 14'b10001001110010;
+       8'b01101111: y = 14'b10001001001010;
+       8'b01110000: y = 14'b10001000100011;
+       8'b01110001: y = 14'b10000111111101;
+       8'b01110010: y = 14'b10000111010111;
+       8'b01110011: y = 14'b10000110110001;
+       8'b01110100: y = 14'b10000110001100;
+       8'b01110101: y = 14'b10000101100111;
+       8'b01110110: y = 14'b10000101000011;
+       8'b01110111: y = 14'b10000100011111;
+       8'b01111000: y = 14'b10000011111100;
+       8'b01111001: y = 14'b10000011011001;
+       8'b01111010: y = 14'b10000010110111;
+       8'b01111011: y = 14'b10000010010101;
+       8'b01111100: y = 14'b10000001110011;
+       8'b01111101: y = 14'b10000001010010;
+       8'b01111110: y = 14'b10000000110001;
+       8'b01111111: y = 14'b10000000010001;       
+       8'b10000000: y = 14'b01111111110001;
+       8'b10000001: y = 14'b01111111010001;
+       8'b10000010: y = 14'b01111110110010;
+       8'b10000011: y = 14'b01111110010011;
+       8'b10000100: y = 14'b01111101110101;
+       8'b10000101: y = 14'b01111101010110;
+       8'b10000110: y = 14'b01111100111001;
+       8'b10000111: y = 14'b01111100011011;
+       8'b10001000: y = 14'b01111011111110;
+       8'b10001001: y = 14'b01111011100001;
+       8'b10001010: y = 14'b01111011000100;
+       8'b10001011: y = 14'b01111010101000;
+       8'b10001100: y = 14'b01111010001100;
+       8'b10001101: y = 14'b01111001110000;
+       8'b10001110: y = 14'b01111001010101;
+       8'b10001111: y = 14'b01111000111010;
+       8'b10010000: y = 14'b01111000011111;
+       8'b10010001: y = 14'b01111000000100;
+       8'b10010010: y = 14'b01110111101010;
+       8'b10010011: y = 14'b01110111010000;
+       8'b10010100: y = 14'b01110110110110;
+       8'b10010101: y = 14'b01110110011101;
+       8'b10010110: y = 14'b01110110000100;
+       8'b10010111: y = 14'b01110101101011;
+       8'b10011000: y = 14'b01110101010010;
+       8'b10011001: y = 14'b01110100111001;
+       8'b10011010: y = 14'b01110100100001;
+       8'b10011011: y = 14'b01110100001001;
+       8'b10011100: y = 14'b01110011110001;
+       8'b10011101: y = 14'b01110011011010;
+       8'b10011110: y = 14'b01110011000010;
+       8'b10011111: y = 14'b01110010101011;
+       8'b10100000: y = 14'b01110010010100;
+       8'b10100001: y = 14'b01110001111110;
+       8'b10100010: y = 14'b01110001100111;
+       8'b10100011: y = 14'b01110001010001;
+       8'b10100100: y = 14'b01110000111011;
+       8'b10100101: y = 14'b01110000100101;
+       8'b10100110: y = 14'b01110000001111;
+       8'b10100111: y = 14'b01101111111010;
+       8'b10101000: y = 14'b01101111100101;
+       8'b10101001: y = 14'b01101111010000;
+       8'b10101010: y = 14'b01101110111011;
+       8'b10101011: y = 14'b01101110100110;
+       8'b10101100: y = 14'b01101110010001;
+       8'b10101101: y = 14'b01101101111101;
+       8'b10101110: y = 14'b01101101101001;
+       8'b10101111: y = 14'b01101101010101;
+       8'b10110000: y = 14'b01101101000001;
+       8'b10110001: y = 14'b01101100101101;
+       8'b10110010: y = 14'b01101100011010;
+       8'b10110011: y = 14'b01101100000110;
+       8'b10110100: y = 14'b01101011110011;
+       8'b10110101: y = 14'b01101011100000;
+       8'b10110110: y = 14'b01101011001101;
+       8'b10110111: y = 14'b01101010111010;
+       8'b10111000: y = 14'b01101010101000;
+       8'b10111001: y = 14'b01101010010101;
+       8'b10111010: y = 14'b01101010000011;
+       8'b10111011: y = 14'b01101001110001;
+       8'b10111100: y = 14'b01101001011111;
+       8'b10111101: y = 14'b01101001001101;
+       8'b10111110: y = 14'b01101000111100;
+       8'b10111111: y = 14'b01101000101010;
+       8'b11000000: y = 14'b01101000011001;
+       8'b11000001: y = 14'b01101000000111;
+       8'b11000010: y = 14'b01100111110110;
+       8'b11000011: y = 14'b01100111100101;
+       8'b11000100: y = 14'b01100111010100;
+       8'b11000101: y = 14'b01100111000011;
+       8'b11000110: y = 14'b01100110110011;
+       8'b11000111: y = 14'b01100110100010;
+       8'b11001000: y = 14'b01100110010010;
+       8'b11001001: y = 14'b01100110000010;
+       8'b11001010: y = 14'b01100101110010;
+       8'b11001011: y = 14'b01100101100001;
+       8'b11001100: y = 14'b01100101010010;
+       8'b11001101: y = 14'b01100101000010;
+       8'b11001110: y = 14'b01100100110010;
+       8'b11001111: y = 14'b01100100100011;
+       8'b11010000: y = 14'b01100100010011;
+       8'b11010001: y = 14'b01100100000100;
+       8'b11010010: y = 14'b01100011110101;
+       8'b11010011: y = 14'b01100011100101;
+       8'b11010100: y = 14'b01100011010110;
+       8'b11010101: y = 14'b01100011000111;
+       8'b11010110: y = 14'b01100010111001;
+       8'b11010111: y = 14'b01100010101010;
+       8'b11011000: y = 14'b01100010011011;
+       8'b11011001: y = 14'b01100010001101;
+       8'b11011010: y = 14'b01100001111110;
+       8'b11011011: y = 14'b01100001110000;
+       8'b11011100: y = 14'b01100001100010;
+       8'b11011101: y = 14'b01100001010100;
+       8'b11011110: y = 14'b01100001000110;
+       8'b11011111: y = 14'b01100000111000;
+       8'b11100000: y = 14'b01100000101010;
+       8'b11100001: y = 14'b01100000011100;
+       8'b11100010: y = 14'b01100000001111;
+       8'b11100011: y = 14'b01100000000001;
+       8'b11100100: y = 14'b01011111110100;
+       8'b11100101: y = 14'b01011111100110;
+       8'b11100110: y = 14'b01011111011001;
+       8'b11100111: y = 14'b01011111001100;
+       8'b11101000: y = 14'b01011110111111;
+       8'b11101001: y = 14'b01011110110010;
+       8'b11101010: y = 14'b01011110100101;
+       8'b11101011: y = 14'b01011110011000;
+       8'b11101100: y = 14'b01011110001011;
+       8'b11101101: y = 14'b01011101111110;
+       8'b11101110: y = 14'b01011101110010;
+       8'b11101111: y = 14'b01011101100101;
+       8'b11110000: y = 14'b01011101011001;
+       8'b11110001: y = 14'b01011101001100;
+       8'b11110010: y = 14'b01011101000000;
+       8'b11110011: y = 14'b01011100110100;
+       8'b11110100: y = 14'b01011100101000;
+       8'b11110101: y = 14'b01011100011100;
+       8'b11110110: y = 14'b01011100010000;
+       8'b11110111: y = 14'b01011100000100;
+       8'b11111000: y = 14'b01011011111000;
+       8'b11111001: y = 14'b01011011101100;
+       8'b11111010: y = 14'b01011011100000;
+       8'b11111011: y = 14'b01011011010101;
+       8'b11111100: y = 14'b01011011001001;
+       8'b11111101: y = 14'b01011010111101;
+       8'b11111110: y = 14'b01011010110010;
+       8'b11111111: y = 14'b01011010100111;
+       default: y = 14'bxxxxxxxxxxxxxx;
+     endcase // case (a)
+    
+endmodule // sbtm_a0
+
+    
+    
+    
--- a/pipelined/src/fpu/sbtm_a3.sv
+++ b/pipelined/src/fpu/sbtm_a3.sv
@ -0,0 +1,230 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Bipartite Lookup
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module sbtm_a3 (input  logic [7:0] a,
+		output logic [5:0] y);
+   
+   always_comb
+     case(a)
+       8'b01000000: y = 6'b100110;
+       8'b01000001: y = 6'b100001;
+       8'b01000010: y = 6'b011100;
+       8'b01000011: y = 6'b010111;
+       8'b01000100: y = 6'b010010;
+       8'b01000101: y = 6'b001100;
+       8'b01000110: y = 6'b000111;
+       8'b01000111: y = 6'b000010;
+       8'b01001000: y = 6'b100000;
+       8'b01001001: y = 6'b011100;
+       8'b01001010: y = 6'b011000;
+       8'b01001011: y = 6'b010011;
+       8'b01001100: y = 6'b001111;
+       8'b01001101: y = 6'b001010;
+       8'b01001110: y = 6'b000110;
+       8'b01001111: y = 6'b000010;
+       8'b01010000: y = 6'b011100;
+       8'b01010001: y = 6'b011000;
+       8'b01010010: y = 6'b010100;
+       8'b01010011: y = 6'b010000;
+       8'b01010100: y = 6'b001101;
+       8'b01010101: y = 6'b001001;
+       8'b01010110: y = 6'b000101;
+       8'b01010111: y = 6'b000001;
+       8'b01011000: y = 6'b011000;
+       8'b01011001: y = 6'b010101;
+       8'b01011010: y = 6'b010010;
+       8'b01011011: y = 6'b001110;
+       8'b01011100: y = 6'b001011;
+       8'b01011101: y = 6'b001000;
+       8'b01011110: y = 6'b000100;
+       8'b01011111: y = 6'b000001;
+       8'b01100000: y = 6'b010101;
+       8'b01100001: y = 6'b010010;
+       8'b01100010: y = 6'b001111;
+       8'b01100011: y = 6'b001101;
+       8'b01100100: y = 6'b001010;
+       8'b01100101: y = 6'b000111;
+       8'b01100110: y = 6'b000100;
+       8'b01100111: y = 6'b000001;
+       8'b01101000: y = 6'b010011;
+       8'b01101001: y = 6'b010000;
+       8'b01101010: y = 6'b001110;
+       8'b01101011: y = 6'b001011;
+       8'b01101100: y = 6'b001001;
+       8'b01101101: y = 6'b000110;
+       8'b01101110: y = 6'b000011;
+       8'b01101111: y = 6'b000001;
+       8'b01110000: y = 6'b010001;
+       8'b01110001: y = 6'b001111;
+       8'b01110010: y = 6'b001100;
+       8'b01110011: y = 6'b001010;
+       8'b01110100: y = 6'b001000;
+       8'b01110101: y = 6'b000101;
+       8'b01110110: y = 6'b000011;
+       8'b01110111: y = 6'b000001;
+       8'b01111000: y = 6'b001111;
+       8'b01111001: y = 6'b001101;
+       8'b01111010: y = 6'b001011;
+       8'b01111011: y = 6'b001001;
+       8'b01111100: y = 6'b000111;
+       8'b01111101: y = 6'b000101;
+       8'b01111110: y = 6'b000011;
+       8'b01111111: y = 6'b000001;       
+       8'b10000000: y = 6'b001110;
+       8'b10000001: y = 6'b001100;
+       8'b10000010: y = 6'b001010;
+       8'b10000011: y = 6'b001000;
+       8'b10000100: y = 6'b000110;
+       8'b10000101: y = 6'b000100;
+       8'b10000110: y = 6'b000010;
+       8'b10000111: y = 6'b000000;
+       8'b10001000: y = 6'b001101;
+       8'b10001001: y = 6'b001011;
+       8'b10001010: y = 6'b001001;
+       8'b10001011: y = 6'b000111;
+       8'b10001100: y = 6'b000110;
+       8'b10001101: y = 6'b000100;
+       8'b10001110: y = 6'b000010;
+       8'b10001111: y = 6'b000000;
+       8'b10010000: y = 6'b001100;
+       8'b10010001: y = 6'b001010;
+       8'b10010010: y = 6'b001000;
+       8'b10010011: y = 6'b000111;
+       8'b10010100: y = 6'b000101;
+       8'b10010101: y = 6'b000100;
+       8'b10010110: y = 6'b000010;
+       8'b10010111: y = 6'b000000;
+       8'b10011000: y = 6'b001011;
+       8'b10011001: y = 6'b001001;
+       8'b10011010: y = 6'b001000;
+       8'b10011011: y = 6'b000110;
+       8'b10011100: y = 6'b000101;
+       8'b10011101: y = 6'b000011;
+       8'b10011110: y = 6'b000010;
+       8'b10011111: y = 6'b000000;
+       8'b10100000: y = 6'b001010;
+       8'b10100001: y = 6'b001000;
+       8'b10100010: y = 6'b000111;
+       8'b10100011: y = 6'b000110;
+       8'b10100100: y = 6'b000100;
+       8'b10100101: y = 6'b000011;
+       8'b10100110: y = 6'b000010;
+       8'b10100111: y = 6'b000000;
+       8'b10101000: y = 6'b001001;
+       8'b10101001: y = 6'b001000;
+       8'b10101010: y = 6'b000111;
+       8'b10101011: y = 6'b000101;
+       8'b10101100: y = 6'b000100;
+       8'b10101101: y = 6'b000011;
+       8'b10101110: y = 6'b000001;
+       8'b10101111: y = 6'b000000;
+       8'b10110000: y = 6'b001000;
+       8'b10110001: y = 6'b000111;
+       8'b10110010: y = 6'b000110;
+       8'b10110011: y = 6'b000101;
+       8'b10110100: y = 6'b000100;
+       8'b10110101: y = 6'b000010;
+       8'b10110110: y = 6'b000001;
+       8'b10110111: y = 6'b000000;
+       8'b10111000: y = 6'b001000;
+       8'b10111001: y = 6'b000111;
+       8'b10111010: y = 6'b000110;
+       8'b10111011: y = 6'b000101;
+       8'b10111100: y = 6'b000011;
+       8'b10111101: y = 6'b000010;
+       8'b10111110: y = 6'b000001;
+       8'b10111111: y = 6'b000000;
+       8'b11000000: y = 6'b000111;
+       8'b11000001: y = 6'b000110;
+       8'b11000010: y = 6'b000101;
+       8'b11000011: y = 6'b000100;
+       8'b11000100: y = 6'b000011;
+       8'b11000101: y = 6'b000010;
+       8'b11000110: y = 6'b000001;
+       8'b11000111: y = 6'b000000;
+       8'b11001000: y = 6'b000111;
+       8'b11001001: y = 6'b000110;
+       8'b11001010: y = 6'b000101;
+       8'b11001011: y = 6'b000100;
+       8'b11001100: y = 6'b000011;
+       8'b11001101: y = 6'b000010;
+       8'b11001110: y = 6'b000001;
+       8'b11001111: y = 6'b000000;
+       8'b11010000: y = 6'b000111;
+       8'b11010001: y = 6'b000110;
+       8'b11010010: y = 6'b000101;
+       8'b11010011: y = 6'b000100;
+       8'b11010100: y = 6'b000011;
+       8'b11010101: y = 6'b000010;
+       8'b11010110: y = 6'b000001;
+       8'b11010111: y = 6'b000000;
+       8'b11011000: y = 6'b000110;
+       8'b11011001: y = 6'b000101;
+       8'b11011010: y = 6'b000100;
+       8'b11011011: y = 6'b000011;
+       8'b11011100: y = 6'b000011;
+       8'b11011101: y = 6'b000010;
+       8'b11011110: y = 6'b000001;
+       8'b11011111: y = 6'b000000;
+       8'b11100000: y = 6'b000110;
+       8'b11100001: y = 6'b000101;
+       8'b11100010: y = 6'b000100;
+       8'b11100011: y = 6'b000011;
+       8'b11100100: y = 6'b000010;
+       8'b11100101: y = 6'b000010;
+       8'b11100110: y = 6'b000001;
+       8'b11100111: y = 6'b000000;
+       8'b11101000: y = 6'b000101;
+       8'b11101001: y = 6'b000101;
+       8'b11101010: y = 6'b000100;
+       8'b11101011: y = 6'b000011;
+       8'b11101100: y = 6'b000010;
+       8'b11101101: y = 6'b000001;
+       8'b11101110: y = 6'b000001;
+       8'b11101111: y = 6'b000000;
+       8'b11110000: y = 6'b000101;
+       8'b11110001: y = 6'b000100;
+       8'b11110010: y = 6'b000100;
+       8'b11110011: y = 6'b000011;
+       8'b11110100: y = 6'b000010;
+       8'b11110101: y = 6'b000001;
+       8'b11110110: y = 6'b000001;
+       8'b11110111: y = 6'b000000;
+       8'b11111000: y = 6'b000101;
+       8'b11111001: y = 6'b000100;
+       8'b11111010: y = 6'b000011;
+       8'b11111011: y = 6'b000011;
+       8'b11111100: y = 6'b000010;
+       8'b11111101: y = 6'b000001;
+       8'b11111110: y = 6'b000001;
+       8'b11111111: y = 6'b000000;
+       default: y = 6'bxxxxxx;
+     endcase // case (a)
+    
+endmodule // sbtm_a0
--- a/pipelined/src/fpu/sbtm_div.sv
+++ b/pipelined/src/fpu/sbtm_div.sv
@ -0,0 +1,62 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Bipartite Lookup for divide portion of fpdivsqrt
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
+
+   // bit partitions
+   logic [3:0] x0;
+   logic [2:0] x1;
+   logic [3:0] x2;
+   logic [2:0] x2_1cmp;   
+   // mem outputs
+   logic [12:0] y0;
+   logic [4:0]  y1;
+   // input to CPA
+   logic [14:0] op1;
+   logic [14:0] op2;
+   logic [14:0] p;  
+   logic        cout; 
+
+   assign x0 = a[10:7];
+   assign x1 = a[6:4];
+   assign x2 = a[3:0];   
+
+   sbtm_a0 mem1 ({x0, x1}, y0);
+   // 1s cmp per sbtm/stam
+   assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];   
+   sbtm_a1 mem2 ({x0, x2_1cmp}, y1);
+   assign op1 = {1'b0, y0, 1'b0};
+   // 1s cmp per sbtm/stam
+   assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
+		{1'b0, 8'b0, y1, 1'b1};
+   // CPA 
+   assign {cout, p} = op1 + op2;
+   assign ia_out = p[14:4];
+
+endmodule // sbtm
--- a/pipelined/src/fpu/sbtm_sqrt.sv
+++ b/pipelined/src/fpu/sbtm_sqrt.sv
@ -0,0 +1,68 @@
+///////////////////////////////////////////
+//
+// Written: James Stine
+// Modified: 8/1/2018
+//
+// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
+
+   // bit partitions
+   logic [4:0] x0;
+   logic [2:0] x1;
+   logic [3:0] x2;
+   logic [2:0] x2_1cmp;   
+   // mem outputs
+   logic [13:0] y0;
+   logic [5:0]  y1;
+   // input to CPA
+   logic [14:0] op1;
+   logic [14:0] op2;
+   logic [14:0] p; 
+   logic        cout;  
+
+   assign x0 = a[11:7];
+   assign x1 = a[6:4];
+   assign x2 = a[3:0];   
+
+   sbtm_a2 mem1 ({x0, x1}, y0);
+   assign op1 = {y0, 1'b0};
+   
+   // 1s cmp per sbtm/stam
+   assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];   
+   sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
+   // 1s cmp per sbtm/stam
+   assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
+		{8'b0, y1, 1'b1};
+   
+   // CPA
+   assign {cout, p} = op1 + op2; 
+   assign y = p[14:4];
+
+endmodule // sbtm2
+
+
+   
+
--- a/pipelined/src/fpu/shifter_denorm.sv
+++ b/pipelined/src/fpu/shifter_denorm.sv
@ -0,0 +1,164 @@
+
+// MJS - This module implements a 57-bit 2-to-1 multiplexor, which is
+// used in the barrel shifter for significand alignment.
+
+module mux21x57 (Z, A, B, Sel);
+
+   input [56:0] A;
+   input [56:0] B;
+   input 	Sel;
+
+   output [56:0] Z;
+
+   assign Z = Sel ? B : A;
+
+endmodule // mux21x57
+
+// MJS - This module implements a 64-bit 2-to-1 multiplexor, which is
+// used in the barrel shifter for significand normalization. 
+
+module mux21x64 (Z, A, B, Sel);
+
+   input [63:0] A;
+   input [63:0] B;
+   input 	Sel;
+
+   output [63:0] Z;
+   
+   assign Z = Sel ? B : A;
+   
+endmodule // mux21x64
+ 
+// The implementation of the barrel shifter was modified to use 
+// fewer gates. It is now implemented using six 64-bit 2-to-1 muxes. The 
+// barrel shifter takes a 64-bit input A and shifts it left by up to 
+// 63-bits, as specified by Shift, to produce a 63-bit output Z. 
+// Bits to the right are filled with zeros. 
+// The 64 bit shift is implemented using 6 stages of shifts of 32
+// 16, 8, 4, 2, and 1 bit shifts. 
+
+module barrel_shifter_l64 (Z, A, Shift);
+
+   input [63:0] A;
+   input [5:0] 	Shift;
+   
+   wire [63:0] 	stage1;
+   wire [63:0] 	stage2;
+   wire [63:0] 	stage3;
+   wire [63:0] 	stage4;
+   wire [63:0] 	stage5;
+   wire [31:0] 	thirtytwozeros = 32'h0;
+   wire [15:0] 	sixteenzeros = 16'h0;
+   wire [ 7:0] 	eightzeros = 8'h0;
+   wire [ 3:0] 	fourzeros = 4'h0;
+   wire [ 1:0] 	twozeros = 2'b00;
+   wire 	onezero = 1'b0;   
+
+   output [63:0] Z;      
+
+   mux21x64  mx01(stage1, A,      {A[31:0], thirtytwozeros}, Shift[5]);
+   mux21x64  mx02(stage2, stage1, {stage1[47:0], sixteenzeros}, Shift[4]);
+   mux21x64  mx03(stage3, stage2, {stage2[55:0], eightzeros}, Shift[3]);
+   mux21x64  mx04(stage4, stage3, {stage3[59:0], fourzeros}, Shift[2]);
+   mux21x64  mx05(stage5, stage4, {stage4[61:0], twozeros}, Shift[1]);
+   mux21x64  mx06(Z     , stage5, {stage5[62:0], onezero}, Shift[0]);
+
+endmodule // barrel_shifter_l63
+
+// The implementation of the barrel shifter was modified to use 
+// fewer gates. It is now implemented using six 57-bit 2-to-1 muxes. The 
+// barrel shifter takes a 57-bit input A and right shifts it by up to 
+// 63-bits, as specified by Shift, to produce a 57-bit output Z. 
+// It also computes a Sticky bit, which is set to 
+// one if any of the bits that were shifted out was one.
+// Bits shifted into the left are filled with zeros. 
+// The 63 bit shift is implemented using 6 stages of shifts of 32
+// 16, 8, 4, 2, and 1 bits.
+
+module barrel_shifter_r57 (Z, Sticky, A, Shift);
+   
+   input [56:0] A;
+   input [5:0] 	Shift;
+
+   output 	Sticky;
+   output [56:0] Z;      
+   
+   wire [56:0] 	stage1;
+   wire [56:0] 	stage2;
+   wire [56:0] 	stage3;
+   wire [56:0] 	stage4;
+   wire [56:0] 	stage5;
+   wire [62:0] 	sixtythreezeros = 63'h0;
+   wire [31:0] 	thirtytwozeros = 32'h0;
+   wire [15:0] 	sixteenzeros = 16'h0;
+   wire [ 7:0] 	eightzeros = 8'h0;
+   wire [ 3:0] 	fourzeros = 4'h0;
+   wire [ 1:0] 	twozeros = 2'b00;
+   wire 	onezero = 1'b0;   
+   wire [62:0] 	S;
+
+   // Shift operations
+   mux21x57  mx01(stage1,      A, {thirtytwozeros,    A[56:32]}, Shift[5]);
+   mux21x57  mx02(stage2, stage1, {sixteenzeros, stage1[56:16]}, Shift[4]);
+   mux21x57  mx03(stage3, stage2, {eightzeros, stage2[56:8]}, Shift[3]);
+   mux21x57  mx04(stage4, stage3, {fourzeros, stage3[56:4]}, Shift[2]);
+   mux21x57  mx05(stage5, stage4, {twozeros, stage4[56:2]}, Shift[1]);
+   mux21x57  mx06(Z     , stage5, {onezero, stage5[56:1]}, Shift[0]);
+
+   // Sticky bit calculation. The Sticky bit is set to one if any of the
+   // bits that were shifter out were one
+
+   assign S[31:0]  = {32{Shift[5]}} &      A[31:0];  
+   assign S[47:32] = {16{Shift[4]}} & stage1[15:0];  
+   assign S[55:48] = { 8{Shift[3]}} & stage2[7:0];  
+   assign S[59:56] = { 4{Shift[2]}} & stage3[3:0];  
+   assign S[61:60] = { 2{Shift[1]}} & stage4[1:0];  
+   assign S[62] =        Shift[0]   & stage5[0];  
+   assign Sticky = (S != sixtythreezeros);
+
+endmodule // barrel_shifter_r57
+
+/*
+module barrel_shifter_r64 (Z, Sticky, A, Shift);
+   
+   input [63:0] A;
+   input [5:0] 	Shift;
+
+   output 	Sticky;
+   output [63:0] Z;      
+   
+   wire [63:0] 	stage1;
+   wire [63:0] 	stage2;
+   wire [63:0] 	stage3;
+   wire [63:0] 	stage4;
+   wire [63:0] 	stage5;
+   wire [62:0] 	sixtythreezeros = 63'h0;
+   wire [31:0] 	thirtytwozeros = 32'h0;
+   wire [15:0] 	sixteenzeros = 16'h0;
+   wire [ 7:0] 	eightzeros = 8'h0;
+   wire [ 3:0] 	fourzeros = 4'h0;
+   wire [ 1:0] 	twozeros = 2'b00;
+   wire 	onezero = 1'b0;   
+   wire [62:0] 	S;
+
+   // Shift operations
+   mux21x64  mx01(stage1,      A, {thirtytwozeros,    A[63:32]}, Shift[5]);
+   mux21x64  mx02(stage2, stage1, {sixteenzeros, stage1[63:16]}, Shift[4]);
+   mux21x64  mx03(stage3, stage2, {eightzeros, stage2[63:8]}, Shift[3]);
+   mux21x64  mx04(stage4, stage3, {fourzeros, stage3[63:4]}, Shift[2]);
+   mux21x64  mx05(stage5, stage4, {twozeros, stage4[63:2]}, Shift[1]);
+   mux21x64  mx06(Z     , stage5, {onezero, stage5[63:1]}, Shift[0]);
+
+   // Sticky bit calculation. The Sticky bit is set to one if any of the
+   // bits that were shifter out were one
+
+   assign S[31:0]  = {32{Shift[5]}} &      A[31:0];  
+   assign S[47:32] = {16{Shift[4]}} & stage1[15:0];  
+   assign S[55:48] = { 8{Shift[3]}} & stage2[7:0];  
+   assign S[59:56] = { 4{Shift[2]}} & stage3[3:0];  
+   assign S[61:60] = { 2{Shift[1]}} & stage4[1:0];  
+   assign S[62] =        Shift[0]   & stage5[0];  
+   assign Sticky = (S != sixtythreezeros);
+
+endmodule // barrel_shifter_r64
+*/
--- a/pipelined/srt/Makefile
+++ b/pipelined/srt/Makefile
@ -0,0 +1,33 @@
+all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
+
+sqrttestgen: sqrttestgen.c
+	gcc sqrttestgen.c -o sqrttestgen -lm
+	./sqrttestgen
+
+testgen: testgen.c
+	gcc testgen.c -o testgen -lm
+	./testgen
+
+exptestgen: exptestgen.c
+	gcc -o exptestgen exptestgen.c -lm
+	./exptestgen
+
+qslc_r4a2: qslc_r4a2.c
+	gcc qslc_r4a2.c -o qslc_r4a2 -lm
+	./qslc_r4a2 > qslc_r4a2.sv
+
+qslc_r4a2b: qslc_r4a2b.c
+	gcc qslc_r4a2b.c -o qslc_r4a2b -lm
+	./qslc_r4a2b > qslc_r4a2b.tv
+
+qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
+	gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
+	./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
+
+inttestgen: inttestgen.c
+	gcc -lm -o inttestgen inttestgen.c
+	./inttestgen
+
+clean:
+	rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
+	
--- a/pipelined/srt/exptestgen.c
+++ b/pipelined/srt/exptestgen.c
@ -0,0 +1,127 @@
+/* testgen.c */
+
+/* Written 2/19/2022 by David Harris
+
+   This program creates test vectors for mantissa and exponent components
+   of an IEEE floating point divider.
+   Builds upon program that creates test vectors for mantissa component only.
+   */
+
+/* #includes */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Constants */
+
+#define ENTRIES  17
+#define RANDOM_VECS 500
+// #define BIAS 1023 // Bias is for double precision
+
+/* Prototypes */
+
+void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac);
+void printhex(FILE *fptr, double x);
+double random_input(void);
+double random_input_e(void);
+
+/* Main */
+
+void main(void)
+{
+  FILE *fptr;
+  // aExp & bExp are exponents
+  // aFrac & bFrac are mantissas
+  // rFrac is result of fractional divsion
+  // rExp is result of exponent division
+  double aFrac, bFrac, rFrac;
+  int    aExp,  bExp,  rExp;
+  int    aSign, bSign, rSign;
+  double mantissa[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
+			  1.75, 1.875, 1.99999,
+			  1.1, 1.2, 1.01, 1.001, 1.0001,
+			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
+  int exponent[ENTRIES] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
+  int i, j;
+  int bias = 1023;
+
+  if ((fptr = fopen("testvectors","w")) == NULL) { 
+    fprintf(stderr, "Couldn't write testvectors file\n");
+    exit(1);
+  }
+
+  for (i=0; i<ENTRIES; i++) {
+    bFrac = mantissa[i];
+    bExp = exponent[i] + bias;
+    bSign = i%2;
+    for (j=0; j<ENTRIES; j++) {
+      aFrac = mantissa[j];
+      aExp = exponent[j] + bias;
+      aSign = j%2;
+      rFrac = aFrac/bFrac;
+      rExp = aExp - bExp + bias;
+      rSign = (i+j)%2;
+      output(fptr, aSign, aExp, aFrac, bSign, bExp, bFrac, rSign, rExp, rFrac);
+    }
+  }
+  
+  // for (i = 0; i< RANDOM_VECS; i++) {
+  //   aFrac = random_input();
+  //   bFrac = random_input();
+  //   aExp = random_input_e() + BIAS; // make new random input function for exponents
+  //   bExp = random_input_e() + BIAS;
+  //   rFrac = a/b;
+  //   rEx[] = e1 - e2 + BIAS;
+  //   output(fptr, aExp, aFrac, bExp, bFrac, rExp, rFrac);
+  // }
+
+  fclose(fptr);
+}
+
+/* Functions */
+
+void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac)
+{
+  // Print a in standard double format
+  fprintf(fptr, "%03x", aExp|(aSign<<11));
+  printhex(fptr, aFrac);
+  fprintf(fptr, "_");
+
+  // Print b in standard double format
+  fprintf(fptr, "%03x", bExp|(bSign<<11));
+  printhex(fptr, bFrac);
+  fprintf(fptr, "_");
+
+  // Print r in standard double format
+  fprintf(fptr, "%03x", rExp|(rSign<<11));
+  printhex(fptr, rFrac);
+  fprintf(fptr, "\n");
+}
+
+void printhex(FILE *fptr, double m)
+{
+  int i, val, len;
+
+    len = 52;
+    while (m<1) m *= 2;
+    while (m>2) m /= 2;
+    for (i=0; i<len; i+=4) {
+      m = m - floor(m);
+      m = m * 16;
+      val = (int)(m)%16;
+      fprintf(fptr, "%x", val);
+    }  
+
+}    
+
+double random_input(void)
+{
+  return 1.0 + rand()/32767.0;
+}
+
+double random_input_e(void)
+{
+  return rand() % 300 + 1;
+}
+  
--- a/pipelined/srt/inttestgen
+++ b/pipelined/srt/inttestgen
--- a/pipelined/srt/inttestgen.c
+++ b/pipelined/srt/inttestgen.c
@ -0,0 +1,83 @@
+/* testgen.c */
+
+/* Written 10/31/96 by David Harris
+
+   This program creates test vectors for mantissa component
+   of an IEEE floating point divider. 
+   */
+
+/* #includes */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Constants */
+
+#define ENTRIES  10
+#define RANDOM_VECS 500
+
+/* Prototypes */
+
+void output(FILE *fptr, long a, long b, long r, long rem);
+void printhex(FILE *fptr, long x);
+double random_input(void);
+
+/* Main */
+
+void main(void)
+{
+  FILE *fptr;
+  long a, b, r, rem;
+  long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
+  int i, j;
+
+  if ((fptr = fopen("inttestvectors","w")) == NULL) {
+    fprintf(stderr, "Couldn't write testvectors file\n");
+    exit(1);
+  }
+
+  for (i=0; i<ENTRIES; i++) {
+    b = list[i];
+    for (j=0; j<ENTRIES; j++) {
+      a = list[j];
+      r = a/b;
+      rem = a%b;
+      output(fptr, a, b, r, rem);
+    }
+  }
+  
+//   for (i = 0; i< RANDOM_VECS; i++) {
+//     a = random_input();
+//     b = random_input();
+//     r = a/b;
+//     output(fptr, a, b, r);
+//   }
+
+  fclose(fptr);
+}
+
+/* Functions */
+
+void output(FILE *fptr, long a, long b, long r, long rem)
+{
+  printhex(fptr, a);
+  fprintf(fptr, "_");
+  printhex(fptr, b);
+  fprintf(fptr, "_");
+  printhex(fptr, r);
+  fprintf(fptr, "_");
+  printhex(fptr, rem);
+  fprintf(fptr, "\n");
+}
+
+void printhex(FILE *fptr, long m)
+{
+    fprintf(fptr, "%016llx", m);
+}    
+
+double random_input(void)
+{
+  return 1.0 + rand()/32767.0;
+}
+  
--- a/pipelined/srt/lint-srt
+++ b/pipelined/srt/lint-srt
@ -0,0 +1,2 @@
+verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
+verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
--- a/pipelined/srt/qsel4.dat
+++ b/pipelined/srt/qsel4.dat
--- a/pipelined/srt/qsel4.sv
+++ b/pipelined/srt/qsel4.sv
--- a/pipelined/srt/qslc_r4a2.c
+++ b/pipelined/srt/qslc_r4a2.c
@ -0,0 +1,198 @@
+/*
+  Program:      qslc_r4a2.c
+  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
+  User:         James E. Stine
+
+*/
+
+#include <stdio.h>
+#include <math.h>
+
+#define DIVISOR_SIZE 3
+#define CARRY_SIZE 7
+#define SUM_SIZE 7
+#define TOT_SIZE 7
+
+void disp_binary(double, int, int);
+
+struct bits {
+  unsigned int divisor : DIVISOR_SIZE;
+  int tot : TOT_SIZE;
+} pla;
+
+/* 
+
+   Function:      disp_binary
+   Description:   This function displays a Double-Precision number into
+   four 16 bit integers using the global union variable 
+   dp_number
+   Argument List: double x            The value to be converted
+   int bits_to_left    Number of bits left of radix point
+   int bits_to_right   Number of bits right of radix point
+   Return value:  none
+
+*/
+void disp_binary(double x, int bits_to_left, int bits_to_right) {
+  int i; 
+  double diff;
+
+  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
+    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+      printf("0");
+    }
+    if (i == bits_to_right+1) 
+      ;
+    
+    return;
+  }
+
+  if (x < 0.0) 
+    x = pow(2.0, ((double) bits_to_left)) + x;
+
+  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+    diff = pow(2.0, ((double) -i) );
+    if (x < diff) 
+      printf("0");
+    else {
+      printf("1");
+      x -= diff;
+    }
+    if (i == 0) 
+      ;
+    
+  }
+
+}
+
+int main() {
+  int m;
+  int n;
+  int o;
+  pla.divisor = 0;
+  pla.tot = 0;
+  printf("\tcase({D[5:3],Wmsbs})\n");
+  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
+    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
+      printf("\t\t10'b");
+      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
+      printf("_");
+      disp_binary((double) pla.tot, TOT_SIZE, 0);
+      printf(": q = 4'b");
+
+      /*
+	4 bits for Radix 4 (a=2)
+	1000 = +2
+	0100 = +1
+	0000 =  0
+	0010 = -1
+	0001 = -2		
+      */
+      switch (pla.divisor) {
+      case 0:
+	if ((pla.tot) >= 12)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -4)
+	  printf("0000");
+	else if ((pla.tot) >= -13)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 1:
+	if ((pla.tot) >= 14)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -6)
+	  printf("0000");
+	else if ((pla.tot) >= -15)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 2:
+	if ((pla.tot) >= 15)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -6)
+	  printf("0000");
+	else if ((pla.tot) >= -16)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 3:
+	if ((pla.tot) >= 16)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -6)
+	  printf("0000");
+	else if ((pla.tot) >= -18)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 4:
+	if ((pla.tot) >= 18)
+	  printf("1000");
+	else if ((pla.tot) >= 6)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -20)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 5:
+	if ((pla.tot) >= 20)
+	  printf("1000");
+	else if ((pla.tot) >= 6)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -20)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 6:
+	if ((pla.tot) >= 20)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -22)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 7:
+	if ((pla.tot) >= 24)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -24)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      default: printf ("XXX");
+			
+      }
+			
+      printf(";\n");
+      (pla.tot)++;
+    }
+    (pla.divisor)++;
+  }
+  printf("\tendcase\n");
+  
+}
--- a/pipelined/srt/qslc_r4a2b
+++ b/pipelined/srt/qslc_r4a2b
--- a/pipelined/srt/qslc_r4a2b.c
+++ b/pipelined/srt/qslc_r4a2b.c
@ -0,0 +1,190 @@
+/*
+  Program:      qslc_r4a2.c
+  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
+  User:         James E. Stine
+
+*/
+
+#include <stdio.h>
+#include <math.h>
+
+#define DIVISOR_SIZE 3
+#define CARRY_SIZE 7
+#define SUM_SIZE 7
+#define TOT_SIZE 7
+
+void disp_binary(double, int, int);
+
+struct bits {
+  unsigned int divisor : DIVISOR_SIZE;
+  int tot : TOT_SIZE;
+} pla;
+
+/* 
+
+   Function:      disp_binary
+   Description:   This function displays a Double-Precision number into
+   four 16 bit integers using the global union variable 
+   dp_number
+   Argument List: double x            The value to be converted
+   int bits_to_left    Number of bits left of radix point
+   int bits_to_right   Number of bits right of radix point
+   Return value:  none
+
+*/
+void disp_binary(double x, int bits_to_left, int bits_to_right) {
+  int i; 
+  double diff;
+
+  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
+    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+      printf("0");
+    }
+    if (i == bits_to_right+1) 
+      ;
+    
+    return;
+  }
+
+  if (x < 0.0) 
+    x = pow(2.0, ((double) bits_to_left)) + x;
+
+  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+    diff = pow(2.0, ((double) -i) );
+    if (x < diff) 
+      printf("0");
+    else {
+      printf("1");
+      x -= diff;
+    }
+    if (i == 0) 
+      ;
+    
+  }
+
+}
+
+int main() {
+  int m;
+  int n;
+  int o;
+  pla.divisor = 0;
+  pla.tot = 0;
+  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
+    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
+      /*
+	4 bits for Radix 4 (a=2)
+	1000 = +2
+	0100 = +1
+	0000 =  0
+	0010 = -1
+	0001 = -2		
+      */
+      switch (pla.divisor) {
+      case 0:
+	if ((pla.tot) >= 12)
+	  printf("8");
+	else if ((pla.tot) >= 4)
+	  printf("4");
+	else if ((pla.tot) >= -4)
+	  printf("0");
+	else if ((pla.tot) >= -13)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      case 1:
+	if ((pla.tot) >= 14)
+	  printf("8");
+	else if ((pla.tot) >= 4)
+	  printf("4");
+	else if ((pla.tot) >= -6)
+	  printf("0");
+	else if ((pla.tot) >= -15)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      case 2:
+	if ((pla.tot) >= 15)
+	  printf("8");
+	else if ((pla.tot) >= 4)
+	  printf("4");
+	else if ((pla.tot) >= -6)
+	  printf("0");
+	else if ((pla.tot) >= -16)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      case 3:
+	if ((pla.tot) >= 16)
+	  printf("8");
+	else if ((pla.tot) >= 4)
+	  printf("4");
+	else if ((pla.tot) >= -6)
+	  printf("0");
+	else if ((pla.tot) >= -18)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      case 4:
+	if ((pla.tot) >= 18)
+	  printf("8");
+	else if ((pla.tot) >= 6)
+	  printf("4");
+	else if ((pla.tot) >= -8)
+	  printf("0");
+	else if ((pla.tot) >= -20)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      case 5:
+	if ((pla.tot) >= 20)
+	  printf("8");
+	else if ((pla.tot) >= 6)
+	  printf("4");
+	else if ((pla.tot) >= -8)
+	  printf("0");
+	else if ((pla.tot) >= -20)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      case 6:
+	if ((pla.tot) >= 20)
+	  printf("8");
+	else if ((pla.tot) >= 8)
+	  printf("4");
+	else if ((pla.tot) >= -8)
+	  printf("0");
+	else if ((pla.tot) >= -22)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      case 7:
+	if ((pla.tot) >= 24)
+	  printf("8");
+	else if ((pla.tot) >= 8)
+	  printf("4");
+	else if ((pla.tot) >= -8)
+	  printf("0");
+	else if ((pla.tot) >= -24)
+	  printf("2");
+	else
+	  printf("1");
+	break;
+      default: printf ("X");
+			
+      }
+			
+      printf("\n");
+      (pla.tot)++;
+    }
+    (pla.divisor)++;
+  }
+  
+}
--- a/pipelined/srt/qslc_r4a2b.tv
+++ b/pipelined/srt/qslc_r4a2b.tv
--- a/pipelined/srt/qslc_sqrt_r4a2
+++ b/pipelined/srt/qslc_sqrt_r4a2
--- a/pipelined/srt/qslc_sqrt_r4a2.c
+++ b/pipelined/srt/qslc_sqrt_r4a2.c
@ -0,0 +1,198 @@
+/*
+  Program:      qslc_r4a2.c
+  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
+  User:         James E. Stine
+
+*/
+
+#include <stdio.h>
+#include <math.h>
+
+#define DIVISOR_SIZE 3
+#define CARRY_SIZE 7
+#define SUM_SIZE 7
+#define TOT_SIZE 7
+
+void disp_binary(double, int, int);
+
+struct bits {
+  unsigned int divisor : DIVISOR_SIZE;
+  int tot : TOT_SIZE;
+} pla;
+
+/* 
+
+   Function:      disp_binary
+   Description:   This function displays a Double-Precision number into
+   four 16 bit integers using the global union variable 
+   dp_number
+   Argument List: double x            The value to be converted
+   int bits_to_left    Number of bits left of radix point
+   int bits_to_right   Number of bits right of radix point
+   Return value:  none
+
+*/
+void disp_binary(double x, int bits_to_left, int bits_to_right) {
+  int i; 
+  double diff;
+
+  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
+    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+      printf("0");
+    }
+    if (i == bits_to_right+1) 
+      ;
+    
+    return;
+  }
+
+  if (x < 0.0) 
+    x = pow(2.0, ((double) bits_to_left)) + x;
+
+  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+    diff = pow(2.0, ((double) -i) );
+    if (x < diff) 
+      printf("0");
+    else {
+      printf("1");
+      x -= diff;
+    }
+    if (i == 0) 
+      ;
+    
+  }
+
+}
+
+int main() {
+  int m;
+  int n;
+  int o;
+  pla.divisor = 0;
+  pla.tot = 0;
+  printf("\tcase({D[5:3],Wmsbs})\n");
+  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
+    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
+      printf("\t\t11'b");
+      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
+      printf("_");
+      disp_binary((double) pla.tot, TOT_SIZE, 0);
+      printf(": q = 4'b");
+
+      /*
+	4 bits for Radix 4 (a=2)
+	1000 = +2
+	0100 = +1
+	0000 =  0
+	0010 = -1
+	0001 = -2		
+      */
+      switch (pla.divisor) {
+      case 0:
+	if ((pla.tot) >= 24)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -26)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 1:
+	if ((pla.tot) >= 28)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -10)
+	  printf("0000");
+	else if ((pla.tot) >= -28)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 2:
+	if ((pla.tot) >= 32)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -12)
+	  printf("0000");
+	else if ((pla.tot) >= -32)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 3:
+	if ((pla.tot) >= 32)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -12)
+	  printf("0000");
+	else if ((pla.tot) >= -34)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 4:
+	if ((pla.tot) >= 36)
+	  printf("1000");
+	else if ((pla.tot) >= 12)
+	  printf("0100");
+	else if ((pla.tot) >= -12)
+	  printf("0000");
+	else if ((pla.tot) >= -36)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 5:
+	if ((pla.tot) >= 40)
+	  printf("1000");
+	else if ((pla.tot) >= 12)
+	  printf("0100");
+	else if ((pla.tot) >= -16)
+	  printf("0000");
+	else if ((pla.tot) >= -40)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 6:
+	if ((pla.tot) >= 40)
+	  printf("1000");
+	else if ((pla.tot) >= 16)
+	  printf("0100");
+	else if ((pla.tot) >= -16)
+	  printf("0000");
+	else if ((pla.tot) >= -44)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 7:
+	if ((pla.tot) >= 44)
+	  printf("1000");
+	else if ((pla.tot) >= 16)
+	  printf("0100");
+	else if ((pla.tot) >= -16)
+	  printf("0000");
+	else if ((pla.tot) >= -46)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      default: printf ("XXX");
+			
+      }
+			
+      printf(";\n");
+      (pla.tot)++;
+    }
+    (pla.divisor)++;
+  }
+  printf("\tendcase\n");
+  
+}
--- a/pipelined/srt/qslc_sqrt_r4a2.sv
+++ b/pipelined/srt/qslc_sqrt_r4a2.sv
--- a/pipelined/srt/sim-srt
+++ b/pipelined/srt/sim-srt
@ -0,0 +1,2 @@
+vsim -do "do srt.do"
+
--- a/pipelined/srt/sim-srt-batch
+++ b/pipelined/srt/sim-srt-batch
@ -0,0 +1 @@
+vsim -c -do "do srt.do"
--- a/pipelined/srt/sim-srt4
+++ b/pipelined/srt/sim-srt4
@ -0,0 +1,2 @@
+vsim -do "do srt-radix4.do"
+
--- a/pipelined/srt/sim-srt4-batch
+++ b/pipelined/srt/sim-srt4-batch
@ -0,0 +1 @@
+vsim -c -do "do srt-radix4.do"
--- a/pipelined/srt/sqrttestgen
+++ b/pipelined/srt/sqrttestgen
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@ -0,0 +1,96 @@
+/* sqrttestgen.c */
+
+/* Written 19 October 2021 David_Harris@hmc.edu
+
+   This program creates test vectors for mantissa component
+   of an IEEE floating point square root. 
+   */
+
+/* #includes */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Constants */
+
+#define ENTRIES  17
+#define RANDOM_VECS 500
+
+/* Prototypes */
+
+void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac);
+void printhex(FILE *fptr, double x);
+double random_input(void);
+
+/* Main */
+
+void main(void)
+{
+  FILE *fptr;
+  double aFrac, rFrac;
+  int    aExp,  rExp;
+  double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
+			  1.75, 1.875, 1.99999,
+			  1.1, 1.2, 1.01, 1.001, 1.0001,
+			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
+  double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+        11, 12, 13, 14, 15, 16};
+  int i;
+  int bias = 1023;
+
+  if ((fptr = fopen("sqrttestvectors","w")) == NULL) {
+    fprintf(stderr, "Couldn't write sqrttestvectors file\n");
+    exit(1);
+  }
+
+  for (i=0; i<ENTRIES; i++) {
+    aFrac = mans[i];
+    aExp  = exps[i] + bias;
+    rFrac = sqrt(aFrac * pow(2, aExp - bias));
+    rExp  = (int) (log(rFrac)/log(2) + bias);
+    output(fptr, aExp, aFrac, rExp, rFrac);
+  }
+  
+  // for (i = 0; i< RANDOM_VECS; i++) {
+  //   a = random_input();
+  //   r = sqrt(a);
+  //   output(fptr, a, r);
+  // }
+
+  fclose(fptr);
+}
+
+/* Functions */
+
+void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
+{
+  fprintf(fptr, "%03x", aExp);
+  printhex(fptr, aFrac);
+  fprintf(fptr, "_");
+  fprintf(fptr, "%03x", rExp);
+  printhex(fptr, rFrac);
+  fprintf(fptr, "\n");
+
+
+}
+
+void printhex(FILE *fptr, double m)
+{
+  int i, val;
+
+  while (m<1) m *= 2;
+  while (m>2) m /= 2;
+  for (i=0; i<52; i+=4) {
+    m = m - floor(m);
+    m = m * 16;
+    val = (int)(m)%16;
+    fprintf(fptr, "%x", val);
+  }    
+}    
+
+double random_input(void)
+{
+  return 1.0 + rand()/32767.0;
+}
+  
--- a/pipelined/srt/sqrttestvectors
+++ b/pipelined/srt/sqrttestvectors
@ -0,0 +1,517 @@
+0000000000000_0000000000000
+8000000000000_3988e1409212e
+4000000000000_1e3779b97f4a8
+2000000000000_0f876ccdf6cd9
+1000000000000_07e0f66afed07
+c000000000000_52a7fa9d2f8ea
+e000000000000_5e8add236a58f
+ffff583a53b8e_6a09ab16ee3d0
+199999999999a_0c7ebc96a56f6
+3333333333333_186f174f88472
+028f5c28f5c29_0146dd68287f3
+004189374bc6a_0020c2830b9c7
+00068db8bac71_000346d6ff116
+d1745d1745d17_e82c3f9d89e1c
+5555555555555_a20bd700c2c3e
+999999999999a_c9f25c5bfedd9
+c71c71c71c71c_e2b7dddfefa66
+ae3271fce3f9c_d551d18e54277
+93e045e88bd11_418bf3cc1e4c3
+90f7838f071e1_c5184e372ee71
+98d2536ca6d95_c982e901a1e14
+d2c916d22da46_e8decc85822fb
+94a0f921f243e_c728c4dbee1d3
+574b50dea1bd4_2873820e10e0c
+895a7660ecc1e_c0c5ced51afa7
+1c77322e645cd_0ddb946295434
+1ba62a7c54f8b_7d169e3a2659b
+e8e2978d2f1a6_61c59e7574d95
+41ffe2a7c54f9_9608c143bfd66
+7590faa9f553f_353eee44a1afa
+06e089a913522_6ede89bf49029
+e79076a8ed51e_f3a1feab3b7d6
+d51d2f4e5e9cc_ea168f50673ac
+45808ced19da3_983c902a22c03
+6f466990d321a_b1a42fd6b592a
+220ac945928b2_815be8939b369
+36c90d6e1adc3_8ee6afea03f82
+0b53a3a7474e9_059a20c9f6405
+f17a816502ca0_f8afe204e2600
+1917108e211c4_7b5d8ccee92ea
+9bc245b48b691_cb26e86f5735a
+40eb7926f24de_955a5577ffe7a
+9a985ff8bff18_4435dbe84773a
+09d9a6834d06a_70f0257a8ab67
+bdc1c7c38f872_51ceac06eed23
+ff7907ba0f742_ffbc7f69e3efc
+bf08f7f1efe3e_de6a83a26fd1d
+06a3206640cc8_6eb3ad01c9815
+ada579aef35de_d504e625a2d6d
+39b060f4c1e98_90c29e1123eb5
+2f2947da8fb52_16959cbd1d48c
+4671cd139a273_98d3bd2eff117
+0c7377beef7de_72bd0582548eb
+f975c46b88d71_67b81123f8dce
+f2157586eb0dd_f8fe755da5331
+2b8fdc2fb85f7_14ecfca93ae4c
+8af47b4cf699f_c1af76b04ddd5
+0db59ffb3ff68_739b3726df36e
+8a3739de73bce_c143ac24df9ed
+99dcbbd977b2f_43ebbe8469bbd
+c87c1d503aa07_e3723a3635fdf
+222386ff0dfe2_10890df0885f0
+68f1a9235246a_2ff9f6505d566
+9d934c9a99353_cc29e5f0e6998
+d690506ca0d94_ead84585b61ec
+1dc9f0d3e1a7c_0e7c1165efbfe
+e613feebfdd80_f2dee7435c007
+0d535dd6bbad7_73578043fac8f
+608ed9bdb37b6_2c6ca8cc4e6bb
+89b27d04fa09f_c0f802ca71172
+539721fa43f48_a0fa3fcb09adb
+c7d220f041e08_e3182e88ae49f
+654afb15f62bf_2e6f61bf98e0e
+06ea8f751eea4_036f615bb315f
+48515122a2454_21e9a04cd1f6b
+d4b76d06da0db_5a65d547598d0
+0282cd059a0b3_01409dbd6fa12
+e80b2216442c9_f3e0d516d00bd
+f0fcd6e9add36_f8701fac9a977
+cdf3d353a6a75_e6553654da734
+b3b1297a52f4a_d84eb4d0cdd1c
+1116f5a5eb4bd_086824801e0f1
+145fa05f40be8_782b4f7607e38
+803d426284c51_39a1e28fda198
+85445d08ba117_be6f8226d6c13
+066f15de2bbc5_6e8f568cc6f8d
+55e48f491e924_a26383073cc76
+1032d851b0a36_7551305922e9c
+41eb5d56baad7_95fbd0e6c36fd
+c02b923f247e5_52b8721f6429d
+dd1f363e6c7ce_ee40ded8c9bdb
+dc98d325a64b5_edfb3ec1fe213
+71258ca319463_b2beaf8de715d
+232415902b205_11017231bc63a
+7a261fb83f708_b803901d08750
+47afdd07ba0f7_999ab6f13db2a
+6a9291cd239a4_30a9519b120a8
+60341ea03d408_a8a695fe1273c
+53f4eb59d6b3b_a133d02e5f0b1
+c2b2483c90792_53ac7d5cd5d67
+c29ebde17bc2f_e054a2134123f
+a8925cd0b9a17_d23db60b50520
+524a8a0d141a3_264878d4d966a
+d4f8ed91db23b_ea039e961a422
+c96daec35d86c_e3f2144d791a1
+66cb9f1f3e3e8_2f1200c8b758c
+5f97f1e7e3cfc_a848653bfa858
+e9bc086410c82_f4be56d9e1746
+2d66981930326_88d50f4e55eaf
+508ec8c991932_9f1c8c264d1f5
+b7a9dbafb75f7_da74869225afe
+c2238317062e1_5376ac34cb03c
+d918439c87391_ec29c1ab0b399
+9806f815f02be_4331e7926a75f
+a12e62c0c5819_ce2a519890e29
+5e56a0a141428_a786593154104
+d27091e923d24_e8b06fcba35cc
+ee19e403c8079_63a777df9bd21
+ba0de3abc7579_dbbe4f307b7a9
+e68d47be8f7d2_f31d205d919e7
+d720bb25764af_eb2391d186941
+2e72bc85790af_8983a68b1933f
+c3201ae035c07_e0998f5edcc08
+484047c08f812_99f4ef763a198
+ba5973dae7b5d_5083801deb09a
+3d403a907520f_930773446aea4
+1fd4498093012_0f72d0c56b2e7
+927402d405a81_c5ef16b504e39
+3adcb25164a2d_1be8dfa703db9
+c9cabf357e6b0_565651a123f9d
+cf1c9ba937527_e6f156560fab1
+801c5c08b8117_bb780dcd4a3ce
+1ae378c6f18de_0d1bd1404d89c
+1cea6bf4d7e9b_7df032936ca73
+aa86a4cd499a9_4a70a8d0586dd
+5b65ac7b58f6b_a5be43f803917
+d04e3b847708f_e791e8d64ca05
+a6aa223444689_48f0e09b7504f
+023dcdfb9bf73_6b9ec1c492343
+fb5da72b4e56a_68658275b9f12
+85620d141a283_be8087eda1701
+f8374f2e9e5d4_fc17d6b6aa491
+debe95252a4a5_ef17d49382367
+5e7450a8a1514_a7984aa86726d
+886996632cc66_3cf350a8e3f14
+7fe6f7ddefbbe_bb593a8c74da0
+7989e283c5079_36e2b9b0780e8
+2d3eba2d745af_15b3dccad59d9
+dbb7a75f4ebea_ed867fc2e2d84
+2b466a2cd459b_8771cd81d47f3
+f49a9335266a5_fa45142e25067
+382293d527aa5_8fc4312e812d9
+76e1195232a46_b61b865625966
+0102fe95fd2c0_6ac0db2f8bcba
+9646ecb5d96bb_c815d9b329126
+501f4cde99bd3_9ed7c5d5bc785
+7d7efa39f473f_b9f4fb5c3d080
+31588b9117223_17961d26f5102
+e679d60bac176_f3132728a8d37
+ae7535aa6b54d_d5763b26476cf
+0b0e710ce219c_71c62b418032b
+68ebf3b7e76fd_addfd161ac4b7
+ebb1a9835306a_f5be89408b278
+987e2d705ae0b_c953d0c9914d0
+772368e2d1c5a_b64243fae3fb7
+502dbcc7798ef_9ee0ae7d41d9b
+ef55989331266_f799268f564e9
+476ba46348c69_9970116fd2787
+8501011202240_be48e041c087a
+7ef86050c0a18_391d2f0629239
+1451dfc3bf878_7821f369d1226
+0a5d0e1e1c3c4_714b482f78206
+4082d985b30b6_1e71f84b709d7
+1e686870d0e1a_0ec7049bce04d
+a32d7afaf5f5f_cf4515600a0db
+d8864ccc99993_5bcd565a71793
+a3b204ec09d81_cf8e4d0f9e74a
+258e5004a0094_83afadcd1ef88
+82e0837d06fa1_bd101d541955f
+a99eebfdd7fbb_d2d1141d12617
+433ae8a5d14ba_96cf2f2b9b8c3
+00029425284a5_00014a11bf5c4
+a2824ed49da94_cee674f907509
+c7add37ba6f75_e304f163dffc6
+00ff9faf3f5e8_6abe7a37761f6
+fee5806f00de0_ff72acb649dc2
+bafa6ab4d569b_dc3d85026c40a
+bdbce225c44b9_ddb8afc7bfccb
+28548ec91d924_136d8e62015c4
+11be577caef96_08b91fd0554ab
+d8a235806b00d_ebec55b8bf00e
+301702e605cc1_8a94b08c2982f
+7171e683cd07a_b2eba2d7a9729
+4e227f64feca0_9d9d5317da9b6
+911e609cc1398_407278920f577
+d37de283c5079_e93d701b76ce6
+a3b45ca4b9497_cf8f9841cb9d6
+6fbd91b323664_32d331f930e2e
+1ae386df0dbe2_7c93c78e97a2c
+28af5daebb5d7_85bf3c0e14efe
+cf77a9c7538ea_5873e435c4655
+5ff5fed3fda80_a88120c300c87
+98335e26bc4d8_43437c938880b
+0fca30186030c_750964d64ec9c
+83c10be217c43_bd91314e9c2f4
+379f95072a0e5_1a721a6753344
+fc0b567eacfd6_fe04afe9c2350
+276d10ca21944_84eb4f9969281
+c15c1a8835107_dfa88eb80f3a5
+7ee5d9f3b3e76_bac4aa8497839
+4204ff89ff140_960bfa7d01fdf
+224d4ada95b53_818814678ee18
+7ee50236046c1_bac42dc7ca58c
+aaa3933f267e5_d35fef27b94ef
+d9d0a8295052a_ec89a1e80c752
+bf22ea21d443b_de7865a94a4bb
+a98313a6274c5_d2c1ceb7337ca
+f57e4ed89db14_fab82ed7ff119
+7cd9cc3f987f3_b995432eff078
+ce8ba57f4afe9_e6a51a7901e9d
+f79fcb87970f3_fbcb7a062af68
+557601bc03780_a21fd8a725b25
+fd3950baa1754_690e4b24fda4d
+4fd9148229045_9eac6e56877bb
+a8fca195432a8_d2780bc6b98a9
+c729d8afb15f6_e2bef95620f17
+3b713d027a04f_1c2bd001532f4
+4caafe31fc640_9cb4a80c2fa32
+fc0a86050c0a2_fe04474537bc8
+424fae7f5cfec_963b0dac95fa2
+d5516f62dec5c_ea31d99df7240
+66bc7668ecd1e_ac921f868adac
+447f5a96b52d7_2038947b4b29b
+50b18f3b1e764_25965fa129e26
+da0299e533ca6_eca396b34a8cd
+2fd5c94b92972_16e4d4254bac7
+44269225244a5_976360c639740
+ca17f3bfe77fd_e44c1dd968501
+4d65c9f393e72_9d28763d5cfbd
+057caca5594ab_6de5e38acee76
+f169d6e3adc76_f8a76dc8df97f
+1ec992eb25d65_0ef4ef3449518
+179f815302a60_7a5fc96aa31be
+7048d911b2236_b23ca645e8430
+d012e565cacb9_58ad8ec8be73b
+e2d452f0a5e15_5f92f5ffaefd3
+2f65c32f865f1_8a21a078f2055
+e3837056e0adc_f18d8a27380ab
+cdc4f091e123c_57d232be8a40f
+58731ece3d9c8_28f31f19298aa
+b20915ce2b9c5_d768a11d16e12
+bcf887910f222_518261ad16d10
+a9911b2236447_d2c98074dcff2
+613f7c4ef89df_2cb7e160d7c89
+327088ed11da2_8c1a3372f503b
+aaa432b46568d_d360467f228ce
+df0e5b3cb6797_ef411299da1bf
+59dcb08d611ac_a4cf75540e2c1
+ee9261bcc3798_63d2d29caeecf
+ed8870a0e141c_63731aff23c30
+384cbe097c130_1ac088bb0dd3f
+6c9b8cbb19763_b01053166e905
+75eabac5758af_b58b788c3d84c
+3e640c5418a83_93c0a50ff06df
+4ee5450a8a151_9e15ce99d389c
+a7f2aa5554aab_49709f0acf22a
+645cc57d8afb1_ab2686a652109
+5912e675cceba_a454a133a1a93
+467d521aa4355_98daf3bf22c37
+63791ed23da48_2da9f6a2bb9b8
+7a264c5c98b93_b803aa160a737
+37e128f651eca_8f9a4ab9a1e87
+411f58deb1bd6_957b16ee69083
+7627146a28d45_b5aec6723866d
+5048040808101_25685807290de
+7b1fb58b6b16d_b894ad98eaf3c
+9e2cf769eed3e_cc7f5b1f41ba4
+d787c5338a671_eb59441cd889b
+5e9d25da4bb49_a7b0f75669cbe
+4e6bb6236c46e_9dcaa120e794c
+0776a925524aa_03b4778fccb7b
+10a03ba47748f_759c2bedefe31
+670fcf3f9e7f4_2f2ecbe910302
+0ac5cb6396c73_0554b03e81ada
+c13df9cbf397e_df987a0fb187c
+5f5082d505aa1_a81d4926c24ed
+8008c2f185e31_398c74e8df332
+c7dc849909321_559d54cc2b0c4
+724e66b4cd69a_33e4bbc30c9eb
+2748493c92792_84d31934513a7
+2fbdc8e391c72_8a5ac8a2e8886
+554b925f24be5_a205dcaa3c364
+27bbabdf57beb_1326782142cac
+8d367798ef31e_c2f808c87d9f1
+27533d567aacf_12f5deec64f17
+51945488a9115_25f92d6ea26b2
+84a732ee65dcd_be155990ed3be
+f8046428c8519_fbfe2e707160d
+d710b2216442d_eb1b360f3527a
+a7a51eda3db48_d1bb607f46386
+5375e713ce27a_26ca91c1f0ce8
+817a8f251e4a4_bc42086f3f1e3
+bf008f911f224_524760d1e709e
+ead84c6898d13_f54f95bc682e5
+d683b837706ee_ead1b3e4bde66
+87a249cc93992_bfca701ca531f
+65f201f003e00_ac191f863ce79
+f047b04f609ec_646fffc092bd4
+5f240ef81df04_a802734b8a7f3
+89124f849f094_3d376cd3db8df
+8c72ac315862b_c288d5b00e9a9
+e2dd638ec71d9_f1381345d33f3
+d5276366c6cd9_ea1be3b9eb936
+b952a6194c32a_db598334d83a2
+a0e924e249c49_ce03f563fdc69
+965f5d2aba557_c823913ba08d9
+189d28e651cca_7b0b409c0a17a
+3072c32d865b1_172cd6665d696
+cf58edbddb7bb_e7110ba7fe74a
+d1c25c40b8817_e85524c53f258
+bf7faad355a6b_deaa03698b005
+fe216d3ada75b_69608b485dc01
+2707ee97dd2fc_84a8b81452ea1
+4d5e02fc05f81_9d23a492cab16
+18825c5cb8b97_0bf97c45ddb75
+baaf8d471a8e3_dc15450d64a61
+f83f5606ac0d6_674989f2b429b
+dad1f5d3eba7d_5ca55963e3317
+b2adf167e2cfc_d7c21f6b1ca69
+fbe9776aeed5e_6897272bfb5cc
+2a10337866f0d_143b6e39b27f8
+718b93b7276e5_b2fabfa759d04
+5e515f82bf058_a7832c1f42b4f
+74866188c3118_b4bac8738fa28
+1cd4c5398a731_7de1af32f25fe
+34cf17b22f646_8da17681a4b5c
+f845569aad356_674bad5d4924d
+ad25333e667cd_d4bedd0aecd3c
+72d60dbc1b783_b3bd135e57267
+6d02e905d20ba_b04d8c9bc773b
+c6a16bf2d7e5b_e2769b569a79e
+fe8573cae795d_6983f76829169
+f0c0ee41dc83c_649b871bad319
+1b26f96df2dbe_7cc1236af05ca
+711ebfed7fdb0_3366654dceb41
+4ca0ce019c034_23cf3daf6a36c
+dd271b8a37147_ee44f5c421b0b
+d12a88dd11ba2_e80586b7805d9
+3e87c897912f2_93d74ce63be2d
+ac7a094012802_d461590559b4f
+a2e6e515ca2b9_cf1e126c2a7b4
+fc02e6c5cd8ba_68a02e6a93e0d
+57097faaff560_285715a40378d
+93d1a7534ea6a_418621e995c81
+5156f421e843d_9f97ebad5261b
+37c3ee03dc07c_8f8790704d084
+0924c1d183a30_04881c489753d
+35c27c74f8e9f_8e3e0ae532ece
+2937474e8e9d2_13d6d7821da0f
+b9d29245248a5_50502e27b8267
+4b635ad6b5ad7_9be93a4cb06c1
+745a054c0a981_34be1df77fe08
+27366e6cdcd9c_12e8756a6f38e
+535d74a2e945d_26bff41decc57
+756b2136426c8_352f434ee70b5
+b59cd111a2234_4eb501f2ec4eb
+de7bd203a4075_eef54dfc17c7e
+2ace0f781ef04_87230fc21ac9f
+0fdb377e6efce_07cf1e235f818
+514bdfb7bf6f8_9f91186d6fd41
+860cc9b993732_bee2615ddfe41
+f327ed77daefb_657801de5581d
+42226664ccc9a_961e8383f1d72
+c422e781cf03a_e1235641b538d
+405bb51b6a36d_94ff7fc057495
+0915f98ff31fe_706845cad2ee6
+a9b539ea73d4e_d2dd4f3f72272
+1d7cd09da13b4_7e52454aa77ff
+b47504d209a41_4e43d00a88125
+a8f317122e246_d272cf495d24d
+93e1b3ab6756d_418c8565c5fc9
+f46d9ddb3bb67_fa2e57dc465a1
+4ddd14da29b45_2459d84009dd8
+757399af335e6_b545bfaaeaa5c
+47023b207640f_992e23a866582
+f8437e92fd260_fc1dfa5d8e2c1
+5a630f561eac4_29c898a1bc51b
+cb929c0938127_e5141d2922f10
+16b876a0ed41e_79c357aa29d9a
+a4ace129c2538_482a7a94cd950
+637726ee4ddca_2da920d3c03e9
+910f787ef0fde_c525d7242a063
+8c81733ee67dd_c2913b3167ea7
+2cc04b0896113_15798fdab384d
+d5c196432c866_5ac821396e6b4
+bb2cd765aecb6_dc589f396a3db
+712c8e0d1c1a4_b2c2cf96b0ce5
+92e04c4098813_c62c2457074fe
+cbe9cdab9b573_5721221a0bf90
+f91829d053a0a_fc8914b7cdd21
+6a77e47fc8ff9_aecb5b85be439
+afeec21d843b1_d643ef2b90f14
+e54c410882110_6078bd1f8333d
+bafe7e18fc320_dc3fb5df72e0c
+80c4c00980130_39d929a387cc1
+97deacbd597ab_4321f1d40e697
+881cef41de83c_3cd45a5f9ee1b
+3507ccf799ef3_1944a8caee28e
+5076a6354c6aa_9f0da98ac6c59
+9e46fa09f413f_cc8dd08fb8be8
+0dc3f617ec2fe_73a5175d66c26
+a618007000e00_d0e0e43954673
+b5c617cc2f986_d96f38a1e24ed
+d4bb0fee1fdc4_5a672d42d6981
+8fd319b6336c6_c472f924b09f7
+625c9c9d393a7_a9f3256ef10d4
+c5360d041a083_549ebeb643612
+e7f50aca15943_616fa109cde0f
+2de938a27144e_892a268a2acc0
+2847242a48549_857ac0a5f747c
+0f5311f223e44_74b79cf898d09
+30ae26f04de0a_8af6b35ff6236
+72a1394a7294e_3407285970c2c
+379d0a5214a43_1a70f382c5425
+c70c4c7498e93_e2af4dc60c104
+e80d9edb3db68_617887a7722a9
+5bd5721ae435d_a602171c5580b
+3832da7db4fb7_8fce9d43fd697
+0d32d435a86b5_73411135aca74
+3ce8436086c11_92cf8f6ebc849
+31450446088c1_8b58701eb7b1e
+de92e2b5c56b9_ef013bd7260e7
+a8e1f3e7e7cfd_d26967647c8c8
+1f2926124c24a_0f2211bad7fca
+94e9b5f36be6d_41f58c8807554
+d8f88bed17da3_ec1941ea35d9c
+5b7be977d2efa_a5cbc36bac10c
+ed535cbeb97d7_635ffd630a85b
+cdd9c98393072_e6478154d8edf
+a753a09341268_d18e92ec81d15
+29d550caa1954_14202398e8fff
+6dcadfd5bfab8_3202e02068f82
+c1b3209a41348_dfd6fe91e15e3
+608a13c427885_2c6aa026e29ec
+a140f7e1efc3e_ce349c86121be
+51803a487490f_9fb15781c905b
+29ba6774cee9a_866e7b273cac2
+07abfcb7f96ff_03cebea460554
+8e7cdba5b74b7_c3b12ac430dbc
+4082df19be338_9518407191a9a
+ac1b2286450c9_d42d7783a38fe
+3b93db1fb63f7_1c3b675d40b07
+c4ac180830106_e16c4f5d7d3e5
+9596fc8df91bf_c7b30e07afbd6
+399672b8e571d_90b20d5068710
+c18a9395272a5_533cfc9f3fe5c
+596a8e951d2a4_a48a00d82c458
+9089cf3f9e7f4_c4da4e411869b
+70622cf059e0b_b24b94843b053
+9a5d8b0b16163_ca5fd85efc4f9
+9ad825304a609_444f08369b06e
+4342625cc4b99_1fabad9842b85
+bc7436e06dc0e_51502fc43175e
+d92019f033e06_ec2dd53f69949
+8b08abe157c2b_3e01e15ae4182
+10d5522ea45d5_75c08a7bf9165
+3cea498493092_92d0d8ba07bc4
+1c61504ea09d4_7d94416e09d5b
+de40218043008_eed66db4947dd
+855e098413082_3bb7e1b37331c
+875de1bbc3778_3c872161079b4
+38c351d6a3ad4_1af6318279595
+799274ace959d_b7ad9deab7b4b
+0ab822e045c09_054dff9072a02
+4ca4b2cd659ad_9cb0c07cc188c
+1acd6c86d90db_7c84e91c740a7
+d6da4bbc97793_eafed9ccde54b
+5f3e5948b2916_a812527e18662
+9ea16ae2d5c5b_ccc013621ca2b
+6551275a4eb4a_abb8e42c4fd71
+3f7670b4e169c_1df9ee59883b6
+4ab68d651aca3_9b7dc70153f53
+8a8a7114e229c_3dcf0e6129473
+10e43e607cc10_75cac2f8b142b
+b01a4c8899113_d65ba26339e53
+3ca9879b0f362_92a7aeed082a3
+491390d321a64_9a78ca81ce7d7
+097edb15b62b7_70b11fde081ea
+9a5aada55b4ab_441d7f4eb8f26
+72df7b76f6edf_3421065197feb
+70143dd87bb0f_b21da1b300cb4
+9a97696ed2dda_ca8029002c6f3
+5b0eeee9ddd3c_a58999cd748d9
+39216502ca059_90673d90505b0
+0d96fe2dfc5c0_73861c5399359
+046a26944d28a_0232a8f504ee4
+5f1bab6f56deb_a7fd62ab6c98d
+8048091012202_bb9143863c355
+3cec95252a4a5_92d24e2d35070
+6692abf957f2b_ac792841ad4d7
+22bf07060e0c2_81d392ae8a6a9
+2b0bf617ec2fe_14affea7c72e7
+772a6e54dca9c_35e839db8616e
+3896f75deebbe_1ae22082f4d3d
+0bdf2ed65dacc_05de5efbd3b35
+4f9107ae0f5c2_9e7ff0fac94aa
+b716e835d06ba_da2535bdda60b
+730f8c7f18fe3_b3ded974083fa
+5d92c1dd83bb0_2b266a35dbe6c
+55b25314a6295_a244c51c2b5ba
+c2130a7a14f43_e00a279d14229
+4e82994932926_9dd8c9c26b402
+40c5c0e381c70_1e8fdc3761c6f
+c5261f5c3eb88_e1ad2eb83b960
+7d835e86bd0d8_b9f786983a82c
+41dcb3c96792d_95f291eddaca8
+01c9a6ef4ddea_6b4cf2282cb85
--- a/pipelined/srt/srt-radix4.do
+++ b/pipelined/srt/srt-radix4.do
@ -0,0 +1,31 @@
+# srt.do   
+#
+# David_Harris@hmc.edu 19 October 2021
+
+# Use this wally-pipelined.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do wally-pipelined.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do wally-pipelined.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
+vopt +acc work.testbenchradix4 -o workopt 
+vsim workopt
+
+-- display input and output signals as hexidecimal values
+add wave /testbenchradix4/*
+add wave /testbenchradix4/srtradix4/*
+add wave /testbenchradix4/srtradix4/qsel4/*
+add wave /testbenchradix4/srtradix4/otfc4/*
+
+-- Run the Simulation 
+run -all
--- a/pipelined/srt/srt-waves.do
+++ b/pipelined/srt/srt-waves.do
@ -0,0 +1,5 @@
+add wave -noupdate /testbench/*
+add wave -noupdate /testbench/srt/*
+add wave -noupdate /testbench/srt/otfc2/*
+add wave -noupdate /testbench/srt/preproc/*
+add wave -noupdate /testbench/srt/divcounter/*
--- a/pipelined/srt/srt.do
+++ b/pipelined/srt/srt.do
@ -0,0 +1,28 @@
+# srt.do   
+#
+# David_Harris@hmc.edu 19 October 2021
+
+# Use this wally-pipelined.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do wally-pipelined.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do wally-pipelined.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+vlog +incdir+../config/rv64gc +incdir+../config/shared srt.sv testbench.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
+vopt +acc work.testbench -o workopt 
+vsim workopt
+
+-- display input and output signals as hexidecimal values
+do ./srt-waves.do
+
+-- Run the Simulation 
+run -all
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@ -0,0 +1,318 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu 13 January 2022
+// Modified: cturek@hmc.edu June 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
+`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
+
+module srt (
+  input  logic clk,
+  input  logic Start, 
+  input  logic Stall, // *** multiple pipe stages
+  input  logic Flush, // *** multiple pipe stages
+  // Floating Point Inputs
+  // later add exponents, signs, special cases
+  input  logic       XSign, YSign,
+  input  logic [`NE-1:0] XExp, YExp,
+  input  logic [`NF-1:0] SrcXFrac, SrcYFrac,
+  input  logic [`XLEN-1:0] SrcA, SrcB,
+  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
+  input  logic       W64, // 32-bit ints on XLEN=64
+  input  logic       Signed, // Interpret integers as signed 2's complement
+  input  logic       Int, // Choose integer inputs
+  input  logic       Sqrt, // perform square root, not divide
+  output logic       rsign, done,
+  output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
+  output logic [`NE-1:0] rExp,
+  output logic [3:0] Flags
+);
+
+  logic           qp, qz, qm; // quotient is +1, 0, or -1
+  logic [`NE-1:0] calcExp;
+  logic           calcSign;
+  logic [`DIVLEN+3:0]  X, Dpreproc;
+  logic [`DIVLEN+3:0]  WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
+  logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
+  logic           intSign;
+ 
+  srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
+
+  // Top Muxes and Registers
+  // When start is asserted, the inputs are loaded into the divider.
+  // Otherwise, the divisor is retained and the partial remainder
+  // is fed back for the next iteration.
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+2:0], 1'b0}, X, Start, WSN);
+  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
+  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+2:0], 1'b0}, {(`DIVLEN+4){1'b0}}, Start, WCN);
+  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
+  flopen #(`DIVLEN+4) dflop(clk, Start, Dpreproc, D);
+
+  // Quotient Selection logic
+  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
+  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
+
+  flopen #(`NE) expflop(clk, Start, calcExp, rExp);
+  flopen #(1) signflop(clk, Start, calcSign, rsign);
+  flopen #(7) durflop(clk, Start, calcDur, dur);
+  
+  counter divcounter(clk, Start, dur, done);
+
+  // Divisor Selection logic
+  assign Db = ~D;
+  mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
+
+  // Partial Product Generation
+  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
+  
+  otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
+
+  expcalc expcalc(.XExp, .YExp, .calcExp);
+
+  signcalc signcalc(.XSign, .YSign, .calcSign);
+endmodule
+
+////////////////
+// Submodules //
+////////////////
+
+///////////////////
+// Preprocessing //
+///////////////////
+module srtpreproc (
+  input  logic [`XLEN-1:0] SrcA, SrcB,
+  input  logic [`NF-1:0] SrcXFrac, SrcYFrac,
+  input  logic [`NE-1:0] XExp,
+  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
+  input  logic       W64, // 32-bit ints on XLEN=64
+  input  logic       Signed, // Interpret integers as signed 2's complement
+  input  logic       Int, // Choose integer inputs
+  input  logic       Sqrt, // perform square root, not divide
+  output logic [`DIVLEN+3:0] X, D,
+  output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent
+  output logic       intSign // Quotient integer sign
+);
+
+  logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
+  logic  [`XLEN-1:0] PosA, PosB;
+  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX;
+
+  assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
+  assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
+
+  lzc #(`XLEN) lzcA (PosA, zeroCntA);
+  lzc #(`XLEN) lzcB (PosB, zeroCntB);
+
+  assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
+  assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
+
+  assign PreprocA = ExtraA << (zeroCntA + 1);
+  assign PreprocB = ExtraB << (zeroCntB + 1);
+  assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
+  assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
+
+  assign DivX = Int ? PreprocA : PreprocX;
+  assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac};
+
+  assign X = Sqrt ? SqrtX : {4'b0001, DivX};
+  assign D = {4'b0001, Int ? PreprocB : PreprocY};
+  assign intExp = zeroCntB - zeroCntA + 1;
+  assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
+
+  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
+endmodule
+
+/////////////////////////////////
+// Quotient Selection, Radix 2 //
+/////////////////////////////////
+module qsel2 ( // *** eventually just change to 4 bits
+  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
+  output logic         qp, qz, qm
+);
+ 
+  logic [`DIVLEN+3:`DIVLEN]  p, g;
+  logic          magnitude, sign, cout;
+
+  // The quotient selection logic is presented for simplicity, not
+  // for efficiency.  You can probably optimize your logic to
+  // select the proper divisor with less delay.
+
+  // Quotient equations from EE371 lecture notes 13-20
+  assign p = ps ^ pc;
+  assign g = ps & pc;
+
+  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
+  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
+  assign #1 sign = p[`DIVLEN+3] ^ cout;
+/*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
+			  (ps[52]^pc[52]));
+  assign #1 sign = (ps[55]^pc[55])^
+      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
+			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
+						(ps[52]&pc[52]))))); */
+
+  // Produce quotient = +1, 0, or -1
+  assign #1 qp = magnitude & ~sign;
+  assign #1 qz = ~magnitude;
+  assign #1 qm = magnitude & sign;
+endmodule
+
+///////////////////////////////////
+// On-The-Fly Converter, Radix 2 //
+///////////////////////////////////
+module otfc2 #(parameter N=64) (
+  input  logic         clk,
+  input  logic         Start,
+  input  logic         qp, qz, qm,
+  output logic [N-1:0] r
+);
+
+  //  The on-the-fly converter transfers the quotient 
+  //  bits to the quotient as they come. 
+  //
+  //  This code follows the psuedocode presented in the 
+  //  floating point chapter of the book. Right now, 
+  //  it is written for Radix-2 division.
+  //
+  //  QM is Q-1. It allows us to write negative bits 
+  //  without using a costly CPA. 
+  logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
+  //  QR and QMR are the shifted versions of Q and QM.
+  //  They are treated as [N-1:r] size signals, and 
+  //  discard the r most significant bits of Q and QM. 
+  logic [N+1:0] QR, QMR;
+
+  flopr #(N+3) Qreg(clk, Start, QNext, Q);
+  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
+  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
+
+  always_comb begin
+    QR  = Q[N+1:0];
+    QMR = QM[N+1:0];     // Shift Q and QM
+    if (qp) begin
+      QNext  = {QR,  1'b1};
+      QMNext = {QR,  1'b0};
+    end else if (qz) begin
+      QNext  = {QR,  1'b0};
+      QMNext = {QMR, 1'b1};
+    end else begin        // If qp and qz are not true, then qm is
+      QNext  = {QMR, 1'b1};
+      QMNext = {QMR, 1'b0};
+    end 
+  end
+  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
+
+endmodule
+
+/////////////
+// counter //
+/////////////
+module counter(input  logic clk, 
+               input  logic req, 
+               input  logic [$clog2(`XLEN+1)-1:0] dur,
+               output logic done);
+ 
+   logic    [$clog2(`XLEN+1)-1:0]  count;
+
+  // This block of control logic sequences the divider
+  // through its iterations.  You may modify it if you
+  // build a divider which completes in fewer iterations.
+  // You are not responsible for the (trivial) circuit
+  // design of the block.
+
+  always @(posedge clk)
+    begin
+      if      (count == dur) done <= #1 1;
+      else if (done | req) done <= #1 0;	
+      if (req) count <= #1 0;
+      else     count <= #1 count+1;
+    end
+endmodule
+
+//////////
+// mux3 //
+//////////
+module mux3onehot #(parameter N=65) (
+  input  logic [N+3:0] in0, in1, in2,
+  input  logic         sel0, sel1, sel2,
+  output logic [N+3:0] out
+);
+
+  // lazy inspection of the selects
+  // really we should make sure selects are mutually exclusive
+  assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2);
+endmodule
+
+
+/////////
+// csa //
+/////////
+module csa #(parameter N=69) (
+  input  logic [N-1:0] in1, in2, in3, 
+  input  logic         cin, 
+  output logic [N-1:0] out1, out2
+);
+
+  // This block adds in1, in2, in3, and cin to produce 
+  // a result out1 / out2 in carry-save redundant form.
+  // cin is just added to the least significant bit and
+  // is required to handle adding a negative divisor.
+  // Fortunately, the carry (out2) is shifted left by one
+  // bit, leaving room in the least significant bit to 
+  // insert cin.
+
+  assign #1 out1 = in1 ^ in2 ^ in3;
+  assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
+		    (in2[N-2:0] & in3[N-2:0]), cin};
+endmodule
+
+
+//////////////
+// expcalc  //
+//////////////
+module expcalc(
+  input logic  [`NE-1:0] XExp, YExp,
+  output logic [`NE-1:0] calcExp
+);
+
+  assign calcExp = XExp - YExp + (`NE)'(`BIAS);
+
+endmodule
+
+//////////////
+// signcalc //
+//////////////
+module signcalc(
+  input logic  XSign, YSign,
+  output logic calcSign
+);
+
+  assign calcSign = XSign ^ YSign;
+
+endmodule
--- a/pipelined/srt/srt_stanford.sv
+++ b/pipelined/srt/srt_stanford.sv
@ -0,0 +1,355 @@
+///////////////////////////////////////////////////////
+// srt.sv                                            //
+//                                                   //
+// Written 10/31/96 by David Harris harrisd@leland   //
+// Updated 10/19/21 David_Harris@hmc.edu             //
+//                                                   //
+// This file models a simple Radix 2 SRT divider.    //
+//                                                   //
+///////////////////////////////////////////////////////
+
+// This Verilog file models a radix 2 SRT divider which
+// produces one quotient digit per cycle.  The divider
+// keeps the partial remainder in carry-save form.
+ 
+/////////
+// srt //
+/////////
+module srt(input  logic clk, 
+           input  logic req, 
+           input  logic sqrt,  // 1 to compute sqrt(a), 0 to compute a/b
+           input  logic [51:0] a, b, 
+           output logic [54:0] rp, rm);
+ 
+  // A simple Radix 2 SRT divider/sqrt
+
+  
+  // Internal signals
+
+  logic   [55:0] ps, pc;     // partial remainder in carry-save form
+  logic   [55:0] d;          // divisor
+  logic   [55:0] psa, pca;   // partial remainder result of csa
+  logic   [55:0] psn, pcn;   // partial remainder for next cycle
+  logic   [55:0] dn;         // divisor for next cycle
+  logic   [55:0] dsel;       // selected divisor multiple
+  logic          qp, qz, qm; // quotient is +1, 0, or -1
+  logic   [55:0] d_b;        // inverse of divisor
+ 
+  // Top Muxes and Registers
+  // When start is asserted, the inputs are loaded into the divider.
+  // Otherwise, the divisor is retained and the partial remainder
+  // is fed back for the next iteration.
+  mux2 psmux({psa[54:0], 1'b0}, {4'b0001, a}, req, psn);
+  flop psflop(clk, psn, ps);
+  mux2 pcmux({pca[54:0], 1'b0}, 56'b0, req, pcn);
+  flop pcflop(clk, pcn, pc);
+  mux2 dmux(d, {4'b0001, b}, req, dn);
+  flop dflop(clk, dn, d);
+
+  // Quotient Selection logic
+  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
+  // Accumulate quotient digits in a shift register
+  qsel qsel(ps[55:52], pc[55:52], qp, qz, qm);
+  qacc qacc(clk, req, qp, qz, qm, rp, rm);
+
+  // Divisor Selection logic
+  inv dinv(d, d_b);
+  mux3 divisorsel(d_b, 56'b0, d, qp, qz, qm, dsel);
+
+  // Partial Product Generation
+  csa csa(ps, pc, dsel, qp, psa, pca);
+endmodule
+
+//////////
+// mux2 //
+//////////
+module mux2(input  logic [55:0] in0, in1, 
+            input  logic        sel, 
+            output logic [55:0] out);
+ 
+   assign #1 out = sel ? in1 : in0;
+endmodule
+
+//////////
+// flop //
+//////////
+module flop(clk, in, out);
+  input 	clk;
+  input  [55:0] in;
+  output [55:0] out;
+
+  logic    [55:0] state;
+
+  always @(posedge clk)
+      state <= #1 in;
+
+  assign #1 out = state;
+endmodule
+
+//////////
+// qsel //
+//////////
+module qsel(input  logic [55:52] ps, pc, 
+            output logic         qp, qz, qm);
+ 
+  logic [55:52]  p, g;
+  logic          magnitude, sign, cout;
+
+  // The quotient selection logic is presented for simplicity, not
+  // for efficiency.  You can probably optimize your logic to
+  // select the proper divisor with less delay.
+
+  // Quotient equations from EE371 lecture notes 13-20
+  assign p = ps ^ pc;
+  assign g = ps & pc;
+
+  assign #1 magnitude = ~(&p[54:52]);
+  assign #1 cout = g[54] | (p[54] & (g[53] | p[53] & g[52]));
+  assign #1 sign = p[55] ^ cout;
+/*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
+			  (ps[52]^pc[52]));
+  assign #1 sign = (ps[55]^pc[55])^
+      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
+			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
+						(ps[52]&pc[52]))))); */
+
+  // Produce quotient = +1, 0, or -1
+  assign #1 qp = magnitude & ~sign;
+  assign #1 qz = ~magnitude;
+  assign #1 qm = magnitude & sign;
+endmodule
+
+//////////
+// qacc //
+//////////
+module qacc(clk, req, qp, qz, qm, rp, rm);
+  input 	clk;
+  input         req;
+  input 	qp;
+  input 	qz;
+  input 	qm;
+  output [54:0] rp;
+  output [54:0] rm;
+
+  logic    [54:0] rp, rm; // quotient bit is +/- 1;
+  logic    [7:0]  count;
+
+  always @(posedge clk)
+    begin
+      if (req) 
+	begin
+	  rp <= #1 0;
+	  rm <= #1 0;
+	end
+      else 
+	begin
+	  rp <= #1 {rp[54:0], qp};
+	  rm <= #1 {rm[54:0], qm};
+	end
+    end
+endmodule
+
+/////////
+// inv //
+/////////
+module inv(input  logic [55:0] in, 
+           output logic [55:0] out);
+
+  assign #1 out = ~in;
+endmodule
+
+//////////
+// mux3 //
+//////////
+module mux3(in0, in1, in2, sel0, sel1, sel2, out);
+  input  [55:0] in0;
+  input  [55:0] in1;
+  input  [55:0] in2;
+  input         sel0;
+  input         sel1;
+  input         sel2;
+  output [55:0] out;
+
+  // lazy inspection of the selects
+  // really we should make sure selects are mutually exclusive
+  assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2);
+endmodule
+
+/////////
+// csa //
+/////////
+module csa(in1, in2, in3, cin, out1, out2);
+  input  [55:0] in1;
+  input  [55:0] in2;
+  input  [55:0] in3;
+  input         cin;
+  output [55:0] out1;
+  output [55:0] out2;
+
+  // This block adds in1, in2, in3, and cin to produce 
+  // a result out1 / out2 in carry-save redundant form.
+  // cin is just added to the least significant bit and
+  // is required to handle adding a negative divisor.
+  // Fortunately, the carry (out2) is shifted left by one
+  // bit, leaving room in the least significant bit to 
+  // insert cin.
+
+  assign #1 out1 = in1 ^ in2 ^ in3;
+  assign #1 out2 = {in1[54:0] & (in2[54:0] | in3[54:0]) | 
+		    (in2[54:0] & in3[54:0]), cin};
+endmodule
+
+//////////////
+// finaladd //
+//////////////
+module finaladd(rp, rm, r);
+  input  [54:0] rp;
+  input  [54:0] rm;
+  output [51:0] r;
+
+  logic   [54:0] diff;
+
+  // this magic block performs the final addition for you
+  // to convert the positive and negative quotient digits
+  // into a normalized mantissa.  It returns the 52 bit
+  // mantissa after shifting to guarantee a leading 1.
+  // You can assume this block operates in one cycle
+  // and do not need to budget it in your area and power
+  // calculations.
+	
+  // Since no rounding is performed, the result may be too 
+  // small by one unit in the least significant place (ulp).
+  // The checker ignores such an error.
+
+  assign #1 diff = rp - rm;
+  assign #1 r = diff[54] ? diff[53:2] : diff[52:1];
+endmodule
+
+/////////////
+// counter //
+/////////////
+module counter(input  logic clk, 
+               input  logic req, 
+               output logic done);
+ 
+   logic    [5:0]  count;
+
+  // This block of control logic sequences the divider
+  // through its iterations.  You may modify it if you
+  // build a divider which completes in fewer iterations.
+  // You are not responsible for the (trivial) circuit
+  // design of the block.
+
+  always @(posedge clk)
+    begin
+      if      (count == 54) done <= #1 1;
+      else if (done | req) done <= #1 0;	
+      if (req) count <= #1 0;
+      else     count <= #1 count+1;
+    end
+endmodule
+
+///////////
+// clock //
+///////////
+module clock(clk);
+  output clk;
+ 
+  // Internal clk signal
+  logic clk;
+ 
+endmodule
+
+//////////
+// testbench //
+//////////
+module testbench;
+  logic         clk;
+  logic        req;
+  logic         done;
+  logic [51:0] a;
+  logic [51:0] b;
+  logic  [51:0] r;
+  logic [54:0] rp, rm;   // positive quotient digits
+ 
+  // Test parameters
+  parameter MEM_SIZE = 40000;
+  parameter MEM_WIDTH = 52+52+52;
+ 
+  `define memr  51:0
+  `define memb  103:52
+  `define mema  155:104
+
+  // Test logicisters
+  logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
+  logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
+                            // bit field of an array 
+  logic    [51:0] correctr, nextr;
+  integer testnum, errors;
+
+  // Divider
+  srt  srt(clk, req, a, b, rp, rm);
+
+  // Final adder converts quotient digits to 2's complement & normalizes
+  finaladd finaladd(rp, rm, r);
+
+  // Counter
+  counter counter(clk, req, done);
+
+
+    initial
+    forever
+      begin
+        clk = 1; #17;
+        clk = 0; #16;
+      end
+
+
+  // Read test vectors from disk
+  initial
+    begin
+      testnum = 0; 
+      errors = 0;
+      $readmemh ("testvectors", Tests);
+      Vec = Tests[testnum];
+      a = Vec[`mema];
+      b = Vec[`memb];
+      nextr = Vec[`memr];
+      req <= #5 1;
+    end
+  
+  // Apply directed test vectors read from file.
+
+  always @(posedge clk)
+    begin
+      if (done) 
+	begin
+	  req <= #5 1;
+	  $display("result was %h, should be %h\n", r, correctr);
+	  if ((correctr - r) > 1) // check if accurate to 1 ulp
+	    begin
+	      errors = errors+1;
+	      $display("failed\n");
+	      $stop;
+	    end
+	  if (a === 52'hxxxxxxxxxxxxx)
+	    begin
+	      $display("Tests completed successfully");
+	      $stop;
+	    end
+	end
+      if (req) 
+	begin
+	  req <= #5 0;
+	  correctr = nextr;
+	  testnum = testnum+1;
+	  Vec = Tests[testnum];
+	  $display("a = %h  b = %h",a,b);
+	  a = Vec[`mema];
+	  b = Vec[`memb];
+	  nextr = Vec[`memr];
+	end
+    end
+ 
+endmodule
+ 
--- a/pipelined/srt/stine/Makefile
+++ b/pipelined/srt/stine/Makefile
@ -0,0 +1,27 @@
+
+CC      = gcc
+CFLAGS  = -lm
+LIBS    = 
+OBJS4   = disp.o srt4div.o
+OBJS2   = disp.o srt2div.o
+
+all:		srt4div srt2div
+
+disp.o:		disp.h disp.c
+		$(CC) -g -c -o disp.o disp.c 
+
+srt4div.o:	srt4div.c
+		$(CC) -g -c -o srt4div.o srt4div.c
+
+srt2div.o:	srt2div.c
+		$(CC) -g -c -o srt2div.o srt2div.c
+
+srt4div:  	$(OBJS4)
+		$(CC) -g -O3 -o srt4div $(OBJS4) $(CFLAGS)
+
+srt2div:  	$(OBJS2)
+		$(CC) -g -O3 -o srt2div $(OBJS2) $(CFLAGS)
+
+clean:
+	rm -f *.o *~
+	rm -f core
--- a/pipelined/srt/stine/README
+++ b/pipelined/srt/stine/README
@ -0,0 +1 @@
+vsim -do iter64.do -c
--- a/pipelined/srt/stine/README.md
+++ b/pipelined/srt/stine/README.md
@ -0,0 +1,22 @@
+This is a novel integer divider using r4 division by recurrence.  The
+reference is:
+
+J. E. Stine and K. Hill, "An Efficient Implementation of Radix-4
+Integer Division Using Scaling," 2020 IEEE 63rd International Midwest
+Symposium on Circuits and Systems (MWSCAS), Springfield, MA, USA,
+2020, pp. 1092-1095, doi: 10.1109/MWSCAS48704.2020.9184631.
+
+Although this version does not contain scaling, it could do this, if
+needed.  Moreover, a higher radix or overlapped radix can be done
+easily to expand the the size.  Also, the implementations here are
+initially unsigned but hope to expand for signed, which should be
+easy.
+
+There are two types of tests in this directory within each testbench.
+One tests for 32-bits and the other 64-bits:
+
+int32div.do and int64div.do = test individual vector for debugging
+
+iter32.do and iter64.do = do not use any waveform generation and just
+output lots of tests
+
--- a/pipelined/srt/stine/checkme.sh
+++ b/pipelined/srt/stine/checkme.sh
@ -0,0 +1,19 @@
+#!/bin/sh
+cat iter64_signed.out | grep "0 1$"
+cat iter64_signed.out | grep "1 0$"
+cat iter64_signed.out | grep "0 0$"
+cat iter64_unsigned.out | grep "0 1$"
+cat iter64_unsigned.out | grep "1 0$"
+cat iter64_unsigned.out | grep "0 0$"
+cat iter32_signed.out | grep "0 1$"
+cat iter32_signed.out | grep "1 0$"
+cat iter32_signed.out | grep "0 0$"
+cat iter32_unsigned.out | grep "0 1$"
+cat iter32_unsigned.out | grep "1 0$"
+cat iter32_unsigned.out | grep "0 0$"
+cat iter128_signed.out | grep "0 1$"
+cat iter128_signed.out | grep "1 0$"
+cat iter128_signed.out | grep "0 0$"
+cat iter128_unsigned.out | grep "0 1$"
+cat iter128_unsigned.out | grep "1 0$"
+cat iter128_unsigned.out | grep "0 0$"
--- a/pipelined/srt/stine/disp.c
+++ b/pipelined/srt/stine/disp.c
@ -0,0 +1,60 @@
+#include "disp.h"
+
+double rnd_zero(double x, double bits) {
+  if (x < 0) 
+    return ceiling(x, bits);
+  else
+    return flr(x, bits);
+}
+
+double rne(double x, double precision) {
+  double scale, x_round;
+  scale = pow(2.0, precision);
+  x_round = rint(x * scale) / scale;
+  return x_round;
+}
+
+double flr(double x, double precision) {
+  double scale, x_round;
+  scale = pow(2.0, precision);
+  x_round = floor(x * scale) / scale;
+  return x_round;
+}
+
+double ceiling(double x, double precision) {
+  double scale, x_round;
+  scale = pow(2.0, precision);
+  x_round = ceil(x * scale) / scale;
+  return x_round;
+}
+
+void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file) {
+
+  double diff;
+  int i;
+  if (fabs(x) <  pow(2.0, -bits_to_right)) {
+    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+      fprintf(out_file,"0");
+    }
+    return;
+  }
+  if (x < 0.0) {
+    // fprintf(out_file, "-");
+    // x = - x;
+    x = pow(2.0, ((double) bits_to_left)) + x;
+  }
+  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+    diff = pow(2.0, -i);
+    if (x < diff) {
+      fprintf(out_file, "0");
+    }
+    else {
+      fprintf(out_file, "1");
+      x -= diff;
+    }
+    if (i == 0) {
+      fprintf(out_file, ".");
+    }
+  }
+}
+
--- a/pipelined/srt/stine/disp.h
+++ b/pipelined/srt/stine/disp.h
@ -0,0 +1,18 @@
+#include <stdlib.h>
+#include <math.h>
+#include <stdio.h>
+
+#ifndef DISP
+#define DISP
+
+double rnd_zero(double x, double bits);
+
+double rne(double x, double precision);
+
+double flr(double x, double precision);
+
+double ceiling(double x, double precision);
+
+void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file);
+
+#endif 
--- a/pipelined/srt/stine/idiv-config.vh
+++ b/pipelined/srt/stine/idiv-config.vh
@ -0,0 +1,27 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: james.stine@okstate.edu 9 June 2022
+// Modified: 
+//
+// Purpose: Specify which features are configured
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+// Integer division tests
+`define IDIV_TESTS 1048576
--- a/pipelined/srt/stine/intdiv.sv
+++ b/pipelined/srt/stine/intdiv.sv
--- a/pipelined/srt/stine/iter128.do
+++ b/pipelined/srt/stine/iter128.do
@ -0,0 +1,50 @@
+# Copyright 1991-2007 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation
+run 999586700ns
+quit
--- a/pipelined/srt/stine/iter128S.do
+++ b/pipelined/srt/stine/iter128S.do
@ -0,0 +1,50 @@
+# Copyright 1991-2007 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128S.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation
+run 999586700ns
+quit
--- a/pipelined/srt/stine/iter32.do
+++ b/pipelined/srt/stine/iter32.do
@ -0,0 +1,50 @@
+# Copyright 1991-2007 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation
+run 999586700ns
+quit
--- a/pipelined/srt/stine/iter32S.do
+++ b/pipelined/srt/stine/iter32S.do
@ -0,0 +1,50 @@
+# Copyright 1991-2007 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32S.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation
+run 999586700ns
+quit
--- a/pipelined/srt/stine/iter64.do
+++ b/pipelined/srt/stine/iter64.do
@ -0,0 +1,50 @@
+# Copyright 1991-2007 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation
+run 999586700ns
+quit
--- a/pipelined/srt/stine/iter64S.do
+++ b/pipelined/srt/stine/iter64S.do
@ -0,0 +1,50 @@
+# Copyright 1991-2007 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64S.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.tb
+
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation
+run 999586700ns
+quit
--- a/pipelined/srt/stine/lod.sv
+++ b/pipelined/srt/stine/lod.sv
@ -0,0 +1,182 @@
+///////////////////////////////////////////
+// lod.sv
+//
+// Written: James.Stine@okstate.edu 1 February 2021
+// Modified: 
+//
+// Purpose: Integer Divide instructions
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+module lod2 (P, V, B);
+
+   input logic  [1:0] B;
+
+   output logic P;
+   output logic V;
+
+   assign V = B[0] | B[1];
+   assign P = B[0] & ~B[1];
+   
+endmodule // lo2
+
+module lod_hier #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]          B,
+    output logic [$clog2(WIDTH)-1:0] ZP,
+    output logic 		     ZV);
+
+   if (WIDTH == 128)
+     lod128 lod128 (ZP, ZV, B);	      
+   else if (WIDTH == 64)
+     lod64 lod64 (ZP, ZV, B);	   
+   else if (WIDTH == 32)
+     lod32 lod32 (ZP, ZV, B);
+   else if (WIDTH == 16)
+     lod16 lod16 (ZP, ZV, B);
+   else if (WIDTH == 8)
+     lod8 lod8 (ZP, ZV, B);
+   else if (WIDTH == 4)
+     lod4 lod4 (ZP, ZV, B);
+
+endmodule // lod_hier
+
+module lod4 (ZP, ZV, B);
+
+   input logic [3:0]  B;
+
+   logic  	       ZPa;
+   logic  	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [1:0]  ZP;
+   output logic        ZV;
+
+   lod2 l1(ZPa, ZVa, B[1:0]);
+   lod2 l2(ZPb, ZVb, B[3:2]);
+
+   assign ZP[0:0] = ZVb ? ZPb : ZPa;
+   assign ZP[1]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lod4
+
+module lod8 (ZP, ZV, B);
+
+   input logic [7:0]  B;
+
+   logic [1:0] 	       ZPa;
+   logic [1:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [2:0]  ZP;
+   output logic        ZV;
+
+   lod4 l1(ZPa, ZVa, B[3:0]);
+   lod4 l2(ZPb, ZVb, B[7:4]);
+
+   assign ZP[1:0] = ZVb ? ZPb : ZPa;
+   assign ZP[2]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lod8
+
+module lod16 (ZP, ZV, B);
+
+   input logic [15:0]  B;
+
+   logic [2:0] 	       ZPa;
+   logic [2:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [3:0]  ZP;
+   output logic        ZV;
+
+   lod8 l1(ZPa, ZVa, B[7:0]);
+   lod8 l2(ZPb, ZVb, B[15:8]);
+
+   assign ZP[2:0] = ZVb ? ZPb : ZPa;
+   assign ZP[3]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lod16
+
+module lod32 (ZP, ZV, B);
+
+   input logic [31:0] B;
+
+   logic [3:0] 	      ZPa;
+   logic [3:0] 	      ZPb;
+   logic 	      ZVa;
+   logic 	      ZVb;
+   
+   output logic [4:0] ZP;
+   output logic       ZV;
+   
+   lod16 l1(ZPa, ZVa, B[15:0]);
+   lod16 l2(ZPb, ZVb, B[31:16]);
+   
+   assign ZP[3:0] = ZVb ? ZPb : ZPa;
+   assign ZP[4]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lod32
+
+module lod64 (ZP, ZV, B);
+
+   input logic [63:0]  B;
+   
+   logic [4:0] 	       ZPa;
+   logic [4:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [5:0]  ZP;
+   output logic        ZV;
+   
+   lod32 l1(ZPa, ZVa, B[31:0]);
+   lod32 l2(ZPb, ZVb, B[63:32]);
+   
+   assign ZP[4:0] = ZVb ? ZPb : ZPa;
+   assign ZP[5]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lod64
+
+module lod128 (ZP, ZV, B);
+
+   input logic [127:0]  B;
+   
+   logic [5:0] 	       ZPa;
+   logic [5:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [6:0]  ZP;
+   output logic        ZV;
+   
+   lod64 l1(ZPa, ZVa, B[63:0]);
+   lod64 l2(ZPb, ZVb, B[127:64]);
+   
+   assign ZP[5:0] = ZVb ? ZPb : ZPa;
+   assign ZP[6]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lod128
--- a/pipelined/srt/stine/lzd.do
+++ b/pipelined/srt/stine/lzd.do
@ -0,0 +1,55 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog lod.sv lzd_tb.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.stimulus
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /stimulus/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 350
+configure wave -valuecolwidth 200
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+run 800ns
+quit
--- a/pipelined/srt/stine/lzd.sv
+++ b/pipelined/srt/stine/lzd.sv
@ -0,0 +1,182 @@
+///////////////////////////////////////////
+// lzd.sv
+//
+// Written: James.Stine@okstate.edu 1 February 2021
+// Modified: 
+//
+// Purpose: Integer Divide instructions
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+module lzd2 (P, V, B);
+
+   input logic  [1:0] B;
+
+   output logic P;
+   output logic V;
+
+   assign V = ~(B[0] & B[1]);
+   assign P = B[1];
+   
+endmodule // lzd2
+
+module lzd_hier #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]          B,
+    output logic [$clog2(WIDTH)-1:0] ZP,
+    output logic 		     ZV);
+
+   if (WIDTH == 128)
+     lzd128 lzd127 (ZP, ZV, B);	      
+   else if (WIDTH == 64)
+     lzd64 lzd64 (ZP, ZV, B);	   
+   else if (WIDTH == 32)
+     lzd32 lzd32 (ZP, ZV, B);
+   else if (WIDTH == 16)
+     lzd16 lzd16 (ZP, ZV, B);
+   else if (WIDTH == 8)
+     lzd8 lzd8 (ZP, ZV, B);
+   else if (WIDTH == 4)
+     lzd4 lzd4 (ZP, ZV, B);
+
+endmodule // lzd_hier
+
+module lzd4 (ZP, ZV, B);
+
+   input logic [3:0]  B;
+
+   logic  	       ZPa;
+   logic  	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [1:0]  ZP;
+   output logic        ZV;
+
+   lzd2 l1 (ZPa, ZVa, B[1:0]);
+   lzd2 l2 (ZPb, ZVb, B[3:2]);
+
+   assign ZP[0:0] = ZVb ? ZPb : ZPa;
+   assign ZP[1]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd4
+
+module lzd8 (ZP, ZV, B);
+
+   input logic [7:0]  B;
+
+   logic [1:0] 	       ZPa;
+   logic [1:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [2:0]  ZP;
+   output logic        ZV;
+
+   lzd4 l1 (ZPa, ZVa, B[3:0]);
+   lzd4 l2 (ZPb, ZVb, B[7:4]);
+
+   assign ZP[1:0] = ZVb ? ZPb : ZPa;
+   assign ZP[2]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd8
+
+module lzd16 (ZP, ZV, B);
+
+   input logic [15:0]  B;
+
+   logic [2:0] 	       ZPa;
+   logic [2:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [3:0]  ZP;
+   output logic        ZV;
+
+   lzd8 l1 (ZPa, ZVa, B[7:0]);
+   lzd8 l2 (ZPb, ZVb, B[15:8]);
+
+   assign ZP[2:0] = ZVb ? ZPb : ZPa;
+   assign ZP[3]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd16
+
+module lzd32 (ZP, ZV, B);
+
+   input logic [31:0] B;
+
+   logic [3:0] 	      ZPa;
+   logic [3:0] 	      ZPb;
+   logic 	      ZVa;
+   logic 	      ZVb;
+   
+   output logic [4:0] ZP;
+   output logic       ZV;
+   
+   lzd16 l1 (ZPa, ZVa, B[15:0]);
+   lzd16 l2 (ZPb, ZVb, B[31:16]);
+   
+   assign ZP[3:0] = ZVb ? ZPb : ZPa;
+   assign ZP[4]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd32
+
+module lzd64 (ZP, ZV, B);
+
+   input logic [63:0]  B;
+   
+   logic [4:0] 	       ZPa;
+   logic [4:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [5:0]  ZP;
+   output logic        ZV;
+   
+   lzd32 l1 (ZPa, ZVa, B[31:0]);
+   lzd32 l2 (ZPb, ZVb, B[63:32]);
+   
+   assign ZP[4:0] = ZVb ? ZPb : ZPa;
+   assign ZP[5]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd64
+
+module lzd128 (ZP, ZV, B);
+
+   input logic [127:0]  B;
+   
+   logic [5:0] 	       ZPa;
+   logic [5:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [6:0]  ZP;
+   output logic        ZV;
+   
+   lzd64 l1 (ZPa, ZVa, B[64:0]);
+   lzd64 l2 (ZPb, ZVb, B[127:63]);
+   
+   assign ZP[5:0] = ZVb ? ZPb : ZPa;
+   assign ZP[6]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd128
--- a/pipelined/srt/stine/lzd_tb.sv
+++ b/pipelined/srt/stine/lzd_tb.sv
@ -0,0 +1,59 @@
+//
+// File name : tb
+// Title     : test
+// project   : HW3
+// Library   : test
+// Purpose   : definition of modules for testbench 
+// notes :   
+//
+// Copyright Oklahoma State University
+//
+
+// Top level stimulus module
+
+`timescale 1ns/1ps
+module stimulus;
+
+   logic [7:0] B;   
+   logic [2:0] ZP;   
+   logic       ZV;      
+
+   logic       clk;   
+   
+   integer     handle3;
+   integer     desc3;
+   integer     i;   
+   
+   // instatiate part to test
+   lzd_hier #(8) dut (B, ZP, ZV);
+
+   initial 
+     begin	
+	clk = 1'b1;
+	forever #5 clk = ~clk;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("lzd.out");
+	desc3 = handle3;	
+     end
+   
+   initial
+     begin
+	for (i=0; i < 256; i=i+1)
+	  begin
+	     // Put vectors before beginning of clk
+	     @(posedge clk)
+	       begin
+		  B = $random;
+	       end
+	     @(negedge clk)
+	       begin
+		  $fdisplay(desc3, "%b || %b %b", B, ZP, ZV);
+	       end
+	  end // for (i=0; i < 256; i=i+1)
+	$finish;// 	
+     end // initial begin   
+   
+endmodule // stimulus
--- a/pipelined/srt/stine/mux.sv
+++ b/pipelined/srt/stine/mux.sv
@ -0,0 +1,51 @@
+module mux2 #(parameter WIDTH = 8)
+   (input  logic [WIDTH-1:0] d0, d1, 
+    input logic 	     s, 
+    output logic [WIDTH-1:0] y);
+   
+   assign y = s ? d1 : d0;
+   
+endmodule // mux2
+
+module mux3 #(parameter WIDTH = 8)
+   (input  logic [WIDTH-1:0] d0, d1, d2,
+    input logic [1:0] 	     s, 
+    output logic [WIDTH-1:0] y);
+   
+   assign y = s[1] ? d2 : (s[0] ? d1 : d0);
+   
+endmodule // mux3
+
+module mux4 #(parameter WIDTH = 8)
+   (input  logic [WIDTH-1:0] d0, d1, d2, d3,
+    input logic [1:0] 	     s, 
+    output logic [WIDTH-1:0] y);
+   
+   assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
+   
+endmodule // mux4
+
+module mux21x32 (Z, A, B, Sel);
+
+   input logic [31:0]  A;
+   input logic [31:0]  B;
+   input logic	       Sel;
+
+   output logic [31:0] Z;
+   
+   assign Z = Sel ? B : A;
+   
+endmodule // mux21x32
+
+module mux21x64 (Z, A, B, Sel);
+
+   input logic [63:0]  A;
+   input logic [63:0]  B;
+   input logic 	       Sel;
+
+   output logic [63:0] Z;
+   
+   assign Z = Sel ? B : A;
+   
+endmodule // mux21x64
+
--- a/pipelined/srt/stine/notes
+++ b/pipelined/srt/stine/notes
@ -0,0 +1,30 @@
+Dividend x --(0.10101111), divisord --(0.11000101)(i -- 16(0.1100)2- 12)
+
+X = 175 (xAF)
+D = 197 (xC5)
+
+X = 175/256 = 0.68359375
+D = 197/256 = 0.76953125
+
+Note: Add lg(r) extra iterations due to shifting of computed q
+      q_{computed} = q / radix
+
+./srt4div 0.68359375 0.76953125 8 10
+
+r=2
+X = 0.10011111
+D = 0.11000101
+
+X = 159 (9F)
+D = 197 (C5)
+
+X = 159/256 = 0.62109375
+D = 197/256 = 0.76953125
+
+./srt2div 0.62109375 0.76953125 8 9
+
+
+
+
+
+
--- a/pipelined/srt/stine/otf4.in
+++ b/pipelined/srt/stine/otf4.in
@ -0,0 +1,23 @@
+.i 4
+.o 6
+.ilb quot[3] quot[2] quot[1] quot[0]
+.ob Qin[1] Qin[0] QMin[1] QMin[0] CshiftQ CshiftQM
+
+0000 001100
+0001 100110
+0010 111010
+0011 ------
+0100 010001
+0101 ------
+0110 ------
+0111 ------
+1000 100101
+1001 ------
+1010 ------
+1011 ------
+1100 ------
+1101 ------
+1110 ------
+1111 ------
+
+.e
--- a/pipelined/srt/stine/pd_bad.png
+++ b/pipelined/srt/stine/pd_bad.png
--- a/pipelined/srt/stine/pd_cpa.png
+++ b/pipelined/srt/stine/pd_cpa.png
--- a/pipelined/srt/stine/pd_csa.pdf
+++ b/pipelined/srt/stine/pd_csa.pdf
--- a/pipelined/srt/stine/pd_csa.png
+++ b/pipelined/srt/stine/pd_csa.png
--- a/pipelined/srt/stine/qslc_r4a2
+++ b/pipelined/srt/stine/qslc_r4a2
--- a/pipelined/srt/stine/qslc_r4a2.c
+++ b/pipelined/srt/stine/qslc_r4a2.c
@ -0,0 +1,198 @@
+/*
+  Program:      qslc_r4a2.c
+  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
+  User:         James E. Stine
+
+*/
+
+#include <stdio.h>
+#include <math.h>
+
+#define DIVISOR_SIZE 3
+#define CARRY_SIZE 7
+#define SUM_SIZE 7
+#define TOT_SIZE 7
+
+void disp_binary(double, int, int);
+
+struct bits {
+  unsigned int divisor : DIVISOR_SIZE;
+  int tot : TOT_SIZE;
+} pla;
+
+/* 
+
+   Function:      disp_binary
+   Description:   This function displays a Double-Precision number into
+   four 16 bit integers using the global union variable 
+   dp_number
+   Argument List: double x            The value to be converted
+   int bits_to_left    Number of bits left of radix point
+   int bits_to_right   Number of bits right of radix point
+   Return value:  none
+
+*/
+void disp_binary(double x, int bits_to_left, int bits_to_right) {
+  int i; 
+  double diff;
+
+  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
+    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+      printf("0");
+    }
+    if (i == bits_to_right+1) 
+      ;
+    
+    return;
+  }
+
+  if (x < 0.0) 
+    x = pow(2.0, ((double) bits_to_left)) + x;
+
+  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+    diff = pow(2.0, ((double) -i) );
+    if (x < diff) 
+      printf("0");
+    else {
+      printf("1");
+      x -= diff;
+    }
+    if (i == 0) 
+      ;
+    
+  }
+
+}
+
+int main() {
+  int m;
+  int n;
+  int o;
+  pla.divisor = 0;
+  pla.tot = 0;
+  printf("\tcase({D[5:3],Wmsbs})\n");
+  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
+    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
+      printf("\t\t10'b");
+      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
+      printf("_");
+      disp_binary((double) pla.tot, TOT_SIZE, 0);
+      printf(": q = 4'b");
+
+      /*
+	4 bits for Radix 4 (a=2)
+	1000 = +2
+	0100 = +1
+	0000 =  0
+	0010 = -1
+	0001 = -2		
+      */
+      switch (pla.divisor) {
+      case 0:
+	if ((pla.tot) >= 12)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -4)
+	  printf("0000");
+	else if ((pla.tot) >= -13)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 1:
+	if ((pla.tot) >= 14)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -6)
+	  printf("0000");
+	else if ((pla.tot) >= -15)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 2:
+	if ((pla.tot) >= 15)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -6)
+	  printf("0000");
+	else if ((pla.tot) >= -16)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 3:
+	if ((pla.tot) >= 16)
+	  printf("1000");
+	else if ((pla.tot) >= 4)
+	  printf("0100");
+	else if ((pla.tot) >= -6)
+	  printf("0000");
+	else if ((pla.tot) >= -18)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 4:
+	if ((pla.tot) >= 18)
+	  printf("1000");
+	else if ((pla.tot) >= 6)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -20)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 5:
+	if ((pla.tot) >= 20)
+	  printf("1000");
+	else if ((pla.tot) >= 6)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -20)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 6:
+	if ((pla.tot) >= 20)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -22)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 7:
+	if ((pla.tot) >= 24)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -24)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      default: printf ("XXX");
+			
+      }
+			
+      printf(";\n");
+      (pla.tot)++;
+    }
+    (pla.divisor)++;
+  }
+  printf("\tendcase\n");
+  
+}
--- a/pipelined/srt/stine/run.sh
+++ b/pipelined/srt/stine/run.sh
@ -0,0 +1,8 @@
+#!/bin/sh
+vsim -do iter32S.do -c
+vsim -do iter32.do -c
+vsim -do iter64.do -c
+vsim -do iter64S.do -c
+vsim -do iter128.do -c
+vsim -do iter128S.do -c
+
--- a/pipelined/srt/stine/shift.sv
+++ b/pipelined/srt/stine/shift.sv
@ -0,0 +1,73 @@
+///////////////////////////////////////////
+// shifters.sv
+//
+// Written: James.Stine@okstate.edu 1 February 2021
+// Modified: 
+//
+// Purpose: Integer Divide instructions
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+module shift_right #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]         A,
+    input logic [$clog2(WIDTH)-1:0] Shift,
+    output logic [WIDTH-1:0] 	    Z);
+   
+   logic [WIDTH-1:0] 		    stage [$clog2(WIDTH):0];
+   logic 			    sign;   
+   genvar 			    i;
+
+   assign stage[0] = A;   
+   generate
+      for (i=0;i<$clog2(WIDTH);i=i+1)
+	begin : genbit
+	   mux2 #(WIDTH) mux_inst (stage[i], 
+				   {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, 
+				   Shift[$clog2(WIDTH)-i-1], 
+				   stage[i+1]);
+	end
+   endgenerate
+   assign Z = stage[$clog2(WIDTH)];   
+
+endmodule // shift_right
+
+module shift_left #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]         A,
+    input logic [$clog2(WIDTH)-1:0] Shift,
+    output logic [WIDTH-1:0] 	    Z);
+   
+   logic [WIDTH-1:0] 		    stage [$clog2(WIDTH):0];
+   genvar 			    i;
+   
+   assign stage[0] = A;   
+   generate
+      for (i=0;i<$clog2(WIDTH);i=i+1)
+	begin : genbit
+	   mux2 #(WIDTH) mux_inst (stage[i], 
+				   {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, 
+				   Shift[$clog2(WIDTH)-i-1], 
+				   stage[i+1]);
+	end
+   endgenerate
+   assign Z = stage[$clog2(WIDTH)];   
+
+endmodule // shift_left
+
+
+
+
--- a/pipelined/srt/stine/shift_left.do
+++ b/pipelined/srt/stine/shift_left.do
@ -0,0 +1,55 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv shift.sv shift_left_tb.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.stimulus
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /stimulus/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+run 800ns
+quit
--- a/pipelined/srt/stine/shift_left_tb.sv
+++ b/pipelined/srt/stine/shift_left_tb.sv
@ -0,0 +1,71 @@
+//
+// File name : tb
+// Title     : test
+// project   : HW3
+// Library   : test
+// Purpose   : definition of modules for testbench 
+// notes :   
+//
+// Copyright Oklahoma State University
+//
+
+// Top level stimulus module
+
+`timescale 1ns/1ps
+
+`define XLEN 32
+module stimulus;
+
+   logic [`XLEN-1:0]         A;   
+   logic [$clog2(`XLEN)-1:0] Shift;   
+   logic [`XLEN-1:0] 	     Z;
+   logic [`XLEN-1:0] 	     Z_corr;      
+
+   //logic [63:0]       A;
+   //logic [5:0] 	      Shift;
+   //logic [63:0]       Z;
+   //logic [63:0]       Z_corr;
+   //logic [63:0]       Z_orig;
+
+   
+   logic 	 clk;   
+
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+   
+   // instatiate part to test
+   shift_left dut1 (A, Shift, Z);
+   assign Z_corr = (A << Shift);   
+
+   initial 
+     begin	
+	clk = 1'b1;
+	forever #5 clk = ~clk;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("shift_left.out");
+	desc3 = handle3;	
+     end
+   
+   initial
+     begin
+	for (i=0; i < 256; i=i+1)
+	  begin
+	     // Put vectors before beginning of clk
+	     @(posedge clk)
+	       begin
+		  A = $random;
+		  Shift = $random;
+	       end
+	     @(negedge clk)
+	       begin
+		  $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr));
+	       end
+	  end // for (i=0; i < 256; i=i+1)
+	$finish;// 	
+     end // initial begin   
+   
+endmodule // stimulus
--- a/pipelined/srt/stine/shift_right.do
+++ b/pipelined/srt/stine/shift_right.do
@ -0,0 +1,55 @@
+# Copyright 1991-2016 Mentor Graphics Corporation
+# 
+# Modification by Oklahoma State University
+# Use with Testbench 
+# James Stine, 2008
+# Go Cowboys!!!!!!
+#
+# All Rights Reserved.
+#
+# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
+# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
+# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
+
+# Use this run.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do run.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do run.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+vlog mux.sv shift.sv shift_right_tb.sv
+
+# start and run simulation
+vsim -voptargs=+acc work.stimulus
+
+view wave
+
+-- display input and output signals as hexidecimal values
+# Diplays All Signals recursively
+add wave -hex -r /stimulus/*
+
+-- Set Wave Output Items 
+TreeUpdate [SetDefaultTree]
+WaveRestoreZoom {0 ps} {75 ns}
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+
+-- Run the Simulation 
+run 800ns
+quit
--- a/pipelined/srt/stine/shift_right_tb.sv
+++ b/pipelined/srt/stine/shift_right_tb.sv
@ -0,0 +1,64 @@
+//
+// File name : tb
+// Title     : test
+// project   : HW3
+// Library   : test
+// Purpose   : definition of modules for testbench 
+// notes :   
+//
+// Copyright Oklahoma State University
+//
+
+// Top level stimulus module
+
+`timescale 1ns/1ps
+
+`define XLEN 32
+module stimulus;
+
+   logic [`XLEN-1:0]         A;   
+   logic [$clog2(`XLEN)-1:0] Shift;   
+   logic [`XLEN-1:0] 	     Z;
+   logic [`XLEN-1:0] 	     Z_corr;      
+
+   logic 	 clk;   
+
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+   
+   // instatiate part to test
+   shift_right dut1 (A, Shift, Z);
+   assign Z_corr = (A >> Shift);   
+
+   initial 
+     begin	
+	clk = 1'b1;
+	forever #5 clk = ~clk;
+     end
+   
+   initial
+     begin
+	handle3 = $fopen("shift_right.out");
+	desc3 = handle3;	
+	#250 $finish;		
+     end
+   
+   initial
+     begin
+	for (i=0; i < 128; i=i+1)
+	  begin
+	     // Put vectors before beginning of clk
+	     @(posedge clk)
+	       begin
+		  A = $random;
+		  Shift = $random;
+	       end
+	     @(negedge clk)
+	       begin
+		  $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr));
+	       end
+	  end // @(negedge clk)
+     end // for (j=0; j < 32; j=j+1)
+   
+endmodule // stimulus
--- a/pipelined/srt/stine/shifter.sv
+++ b/pipelined/srt/stine/shifter.sv
@ -0,0 +1,18 @@
+module shifter_right(input logic signed [63:0] a,
+		     input logic [ 5:0] 	shamt,
+		     output logic signed [63:0] y);
+
+
+   y = a >> shamt;
+
+endmodule // shifter_right
+
+module shifter_left(input logic signed [63:0] a,
+		    input logic [ 5:0] 	       shamt,
+		    output logic signed [63:0] y);
+
+
+   y = a << shamt;
+
+endmodule // shifter_right
+
--- a/pipelined/srt/stine/srt2div
+++ b/pipelined/srt/stine/srt2div
--- a/pipelined/srt/stine/srt2div.c
+++ b/pipelined/srt/stine/srt2div.c
@ -0,0 +1,114 @@
+#include "disp.h"
+
+// QSLC is for division by recuerrence for
+// r=2 using a CPA - See 5.109 EL
+int qst (double D, double prem) {
+
+  int q;
+
+  // For Debugging
+  printf("rw --> %lg\n", prem);  
+
+  if (prem >=  0.5) {
+    q = 1;
+  } else if (prem >= -0.5) {
+    q = 0;
+  } else {
+    q = -1;
+  }
+  return q;
+
+}
+
+/*
+ This routine performs a radix-2 SRT division 
+ algorithm.  The user inputs the numerator, the denominator, 
+ and the number of iterations. It assumes that 0.5 <= D < 1.
+        
+*/
+
+int main(int argc, char* argv[]) {
+
+   double P, N, D, Q, RQ, RD, RREM, scale;   
+   int q;
+   int num_iter, i;
+   int prec;
+   int radix = 2;
+   
+   if (argc < 5) {
+      fprintf(stderr,
+	      "Usage: %s numerator denominator num_iterations prec\n", 
+	      argv[0]);
+      exit(1);
+   }
+   sscanf(argv[1],"%lg", &N);
+   sscanf(argv[2],"%lg", &D);
+   sscanf(argv[3],"%d", &num_iter);
+   sscanf(argv[4],"%d", &prec);
+   // Round to precision
+   N = rne(N, prec);
+   D = rne(D, prec);
+   printf("N = ");
+   disp_bin(N, 3, prec, stdout);
+   printf("\n");
+   printf("D = ");
+   disp_bin(D, 3, prec, stdout);
+   printf("\n");
+
+   Q = 0;
+   P = N * pow(2.0, -log2(radix));
+   printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n", 
+	  N, D, N/D, num_iter); 
+   for (scale = 1, i = 0; i < num_iter; i++) {
+     scale = scale * pow(2.0, -log2(radix));
+     q = qst(flr(2*D, 1), 2*P);
+     printf("2*W[n] = ");
+     disp_bin(radix*P, 3, prec, stdout);
+     printf("\n");
+     printf("q*D = ");      
+     disp_bin(q*D, 3, prec, stdout);
+     printf("\n");
+     printf("W[n+1] = ");            
+     disp_bin(P ,3, prec, stdout);
+     printf("\n");     
+     // Recurrence
+     P = radix * P - q * D;
+     Q = Q + q*scale;
+     printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P); 
+     printf("i = %d, q = %d", i, q);
+     printf(", Q = ");
+     disp_bin(Q, 3, prec, stdout);
+     printf(", W = ");
+     disp_bin(P, 3, prec, stdout);
+     printf("\n\n");
+   }
+   if (P < 0) {
+     Q = Q - scale;
+     P = P + D;
+     printf("\nCorrecting Negative Remainder\n");
+     printf("Q = %1.18lf, W = %1.18lf\n", Q, P);
+     printf("Q = ");
+     disp_bin(Q, 3, prec, stdout);
+     printf(", W = ");
+     disp_bin(P, 3, prec, stdout);
+     printf("\n");
+   }
+
+   // Output Results
+   RQ = N/D;
+   // Since q_{computed} = q / radix, multiply by radix
+   RD = Q * radix;
+   printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
+   printf("true = ");
+   disp_bin(RQ, 3, prec, stdout);
+   printf(", computed = ");
+   disp_bin(RD, 3, prec, stdout);
+   printf("\n\n");
+   printf("REM = %1.18lf \n", P);
+   printf("REM = ");
+   disp_bin(P, 3, prec, stdout);
+   printf("\n\n");
+  
+   return 0;
+
+}
--- a/pipelined/srt/stine/srt4_pd.m
+++ b/pipelined/srt/stine/srt4_pd.m
@ -0,0 +1,508 @@
+%
+% PD Region for Np   = 3;  Nd   = 4;
+% w/CPA
+%
+% Clear all variables and screen
+clear
+clf
+% Define the number of bits (input Dividend)
+n = 4;
+%
+% Define Divisor Range
+% Normalized Floating Point [Dmin,Dmax] = [1,2]
+% Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
+%
+Dminimum = 1.0/2;
+Dmaximum = 2.0/2;
+% Define an ulp
+ulp = 2^(-n);
+% radix = beta
+beta  = 4;
+% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
+%
+% SD representations have alpha < beta - 1
+%
+% alpha = ceil(beta/2)  minimially redundant  
+% alpha = beta -1       maximally redundant (rho = 1)
+% alpha = (beta-1)/2    nonredundant
+% alpha > beta - 1      over-redundant
+% 
+rho = 2/3;
+% Calculation of max digit set
+alpha = rho*(beta-1);
+% Da contains digit set
+q = [];
+for i = -alpha:alpha
+  q = [q; i];
+end
+% 4r(i-1)/D values
+hold on
+% figure(1)
+grid off
+for i = 1:length(q)
+  x = -rho+q(i):ulp:rho+q(i);
+  % Plot redundancy (overlap) Positive
+  z = [rho+q(i),rho+q(i)];
+  y = [x(length(x))-q(i),0];
+  % Plot redundancy (overlap) Negative
+  if (i ~= length(q))
+    w = [-rho+q(i+1)-q(i+1),0];
+    u = [-rho+q(i+1),-rho+q(i+1)];
+    % plot(u,w,'b')
+  end
+  % plot(x,x-q(i))
+  % plot(z,y,'r')
+
+end
+% title('Robertson Diagram for Radix-4 SRT Divison')
+
+Np   = 3;
+Nd   = 4;
+Dmin = Dminimum;
+Dmax = Dmaximum;
+ulpd = 2^(-Nd);
+ulpp = 2^(-Np);
+
+%
+% Plot Atkins P-D plot
+% Normalized Floating Point [Dmin,Dmax] = [1,2]
+% Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
+%
+Dmin = Dminimum;
+Dmax = Dmaximum;
+for i = 1:length(q)
+  D = Dmin:ulp:Dmax;
+  P1 = (rho+q(i))*D;
+  P2 = (-rho+q(i))*D;
+  hold on
+  p1 = plot(D,P1);
+  p1.Color = '#0000ff';
+  p2 = plot(D,P2);
+  p2.Color = '#ff0000';
+  axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
+  xticks(D)
+  p1.LineWidth = 2.0;
+  p2.LineWidth = 2.0;
+end
+
+% Let's make x/y axis binary
+j = [];
+for i=1:length(D)
+    j = [j disp_bin(D(i), 1, 4)];
+end
+yk = [];
+yk2 = [];
+for i=-2.5:0.5:2.5;
+    yk = [yk disp_bin(i, 3, 3)];
+    yk2 = [yk2 i];
+end
+xtickangle(90)
+xticklabels(j)
+yticklabels(yk)
+
+% Let's draw allow points on PD plot
+% Positive Portions
+index = 1;
+i = 0:ulpp:rho*beta*Dmaximum;
+for j = Dmin:ulpd:Dmax
+  plot(j*ones(1,length(i)),i,'k')
+end
+
+j = Dmin:ulpd:Dmax;
+for i = 0:ulpp:rho*beta*Dmaximum
+  plot(j,i*ones(length(j)),'k')
+end
+
+% Negative Portions
+index = 1;
+i = 0:-ulpp:rho*-beta*Dmaximum;
+for j = Dmin:ulpd:Dmax
+  plot(j*ones(1,length(i)),i,'k')
+end
+
+j = Dmin:ulpd:Dmax;
+for i = 0:-ulpp:-rho*beta*Dmaximum
+  plot(j,i*ones(length(j)),'k')
+end
+
+% Labels and Printing
+xlh = xlabel(['Divisor (d)']);
+%xlh.FontSize = 18;
+xlh.Position(2) = xlh.Position(2) - 0.1;
+ylh = ylabel(['P = 4 \cdot w_i']);
+ylh.Position(1) = ylh.Position(1)-0.02;
+%ylh.FontSize = 18;
+
+% Containment Values (placed manually although not bad)
+m2 = [3/4 7/8 1.0 1.0 5/4 5/4 5/4 3/2 3/2];
+m1 = [1/4 1/4 1/4 1/4 1/2 1/2 1/2 1/2 1/2];
+m0 = [-1/4 -1/4 -1/4 -1/4 -1/2 -1/2 -1/2 -1/2 -1/2];
+m1b = [-3/4 -7/8 -1 -1 -5/4 -5/4 -5/4 -3/2 -3/2];
+x2 = Dmin:ulpd:Dmax;
+s2 = stairs(x2, m2);
+s2.Color = '#8f08d1';
+s2.LineWidth = 3.0;
+%s2.LineStyle = '--';
+s1 = stairs(x2, m1);
+s1.Color = '#8f08d1';
+s1.LineWidth = 3.0;
+s0 = stairs(x2, m0);
+s0.Color = '#8f08d1';
+s0.LineWidth = 3.0;
+s1b = stairs(x2, m1b);
+s1b.Color = '#8f08d1';
+s1b.LineWidth = 3.0;
+
+% Place manually Quotient (ugh)
+j = Dmin+ulpd/2:ulpd:Dmax;
+i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum;
+text(j(1), i(1), '2')
+text(j(1), i(2), '2')
+text(j(1), i(3), '2')
+text(j(1), i(4), '2')
+text(j(1), i(5), '2')
+text(j(1), i(6), '2')
+text(j(1), i(7), '2')
+text(j(1), i(8), '2')
+text(j(1), i(9), '2')
+text(j(1), i(10), '2')
+text(j(1), i(11), '2')
+text(j(1), i(12), '2')
+text(j(1), i(13), '2')
+text(j(1), i(14), '2')
+text(j(1), i(15), '2')
+text(j(1), i(16), '1')
+text(j(1), i(17), '1')
+text(j(1), i(18), '1')
+text(j(1), i(19), '1')
+text(j(1), i(20), '0')
+text(j(1), i(21), '0')
+text(j(1), i(22), '0')
+text(j(1), i(23), '0')
+text(j(1), i(24), '-1')
+text(j(1), i(25), '-1')
+text(j(1), i(26), '-1')
+text(j(1), i(27), '-1')
+text(j(1), i(28), '-2')
+text(j(1), i(29), '-2')
+text(j(1), i(30), '-2')
+text(j(1), i(31), '-2')
+text(j(1), i(32), '-2')
+text(j(1), i(33), '-2')
+text(j(1), i(34), '-2')
+text(j(1), i(35), '-2')
+text(j(1), i(36), '-2')
+text(j(1), i(37), '-2')
+text(j(1), i(38), '-2')
+text(j(1), i(39), '-2')
+text(j(1), i(40), '-2')
+text(j(1), i(41), '-2')
+text(j(1), i(42), '-2')
+
+text(j(2), i(1), '2')
+text(j(2), i(2), '2')
+text(j(2), i(3), '2')
+text(j(2), i(4), '2')
+text(j(2), i(5), '2')
+text(j(2), i(6), '2')
+text(j(2), i(7), '2')
+text(j(2), i(8), '2')
+text(j(2), i(9), '2')
+text(j(2), i(10), '2')
+text(j(2), i(11), '2')
+text(j(2), i(12), '2')
+text(j(2), i(13), '2')
+text(j(2), i(14), '2')
+text(j(2), i(15), '1')
+text(j(2), i(16), '1')
+text(j(2), i(17), '1')
+text(j(2), i(18), '1')
+text(j(2), i(19), '1')
+text(j(2), i(20), '0')
+text(j(2), i(21), '0')
+text(j(2), i(22), '0')
+text(j(2), i(23), '0')
+text(j(2), i(24), '-1')
+text(j(2), i(25), '-1')
+text(j(2), i(26), '-1')
+text(j(2), i(27), '-1')
+text(j(2), i(28), '-1')
+text(j(2), i(29), '-2')
+text(j(2), i(30), '-2')
+text(j(2), i(31), '-2')
+text(j(2), i(32), '-2')
+text(j(2), i(33), '-2')
+text(j(2), i(34), '-2')
+text(j(2), i(35), '-2')
+text(j(2), i(36), '-2')
+text(j(2), i(37), '-2')
+text(j(2), i(38), '-2')
+text(j(2), i(39), '-2')
+text(j(2), i(40), '-2')
+text(j(2), i(41), '-2')
+text(j(2), i(42), '-2')
+
+text(j(3), i(1), '2')
+text(j(3), i(2), '2')
+text(j(3), i(3), '2')
+text(j(3), i(4), '2')
+text(j(3), i(5), '2')
+text(j(3), i(6), '2')
+text(j(3), i(7), '2')
+text(j(3), i(8), '2')
+text(j(3), i(9), '2')
+text(j(3), i(10), '2')
+text(j(3), i(11), '2')
+text(j(3), i(12), '2')
+text(j(3), i(13), '2')
+text(j(3), i(14), '1')
+text(j(3), i(15), '1')
+text(j(3), i(16), '1')
+text(j(3), i(17), '1')
+text(j(3), i(18), '1')
+text(j(3), i(19), '1')
+text(j(3), i(20), '0')
+text(j(3), i(21), '0')
+text(j(3), i(22), '0')
+text(j(3), i(23), '0')
+text(j(3), i(24), '-1')
+text(j(3), i(25), '-1')
+text(j(3), i(26), '-1')
+text(j(3), i(27), '-1')
+text(j(3), i(28), '-1')
+text(j(3), i(29), '-1')
+text(j(3), i(30), '-2')
+text(j(3), i(31), '-2')
+text(j(3), i(32), '-2')
+text(j(3), i(33), '-2')
+text(j(3), i(34), '-2')
+text(j(3), i(35), '-2')
+text(j(3), i(36), '-2')
+text(j(3), i(37), '-2')
+text(j(3), i(38), '-2')
+text(j(3), i(39), '-2')
+text(j(3), i(40), '-2')
+text(j(3), i(41), '-2')
+text(j(3), i(42), '-2')
+
+text(j(4), i(1), '2')
+text(j(4), i(2), '2')
+text(j(4), i(3), '2')
+text(j(4), i(4), '2')
+text(j(4), i(5), '2')
+text(j(4), i(6), '2')
+text(j(4), i(7), '2')
+text(j(4), i(8), '2')
+text(j(4), i(9), '2')
+text(j(4), i(10), '2')
+text(j(4), i(11), '2')
+text(j(4), i(12), '2')
+text(j(4), i(13), '2')
+text(j(4), i(14), '1')
+text(j(4), i(15), '1')
+text(j(4), i(16), '1')
+text(j(4), i(17), '1')
+text(j(4), i(18), '1')
+text(j(4), i(19), '1')
+text(j(4), i(20), '0')
+text(j(4), i(21), '0')
+text(j(4), i(22), '0')
+text(j(4), i(23), '0')
+text(j(4), i(24), '-1')
+text(j(4), i(25), '-1')
+text(j(4), i(26), '-1')
+text(j(4), i(27), '-1')
+text(j(4), i(28), '-1')
+text(j(4), i(29), '-1')
+text(j(4), i(30), '-2')
+text(j(4), i(31), '-2')
+text(j(4), i(32), '-2')
+text(j(4), i(33), '-2')
+text(j(4), i(34), '-2')
+text(j(4), i(35), '-2')
+text(j(4), i(36), '-2')
+text(j(4), i(37), '-2')
+text(j(4), i(38), '-2')
+text(j(4), i(39), '-2')
+text(j(4), i(40), '-2')
+text(j(4), i(41), '-2')
+text(j(4), i(42), '-2')
+
+text(j(5), i(1), '2')
+text(j(5), i(2), '2')
+text(j(5), i(3), '2')
+text(j(5), i(4), '2')
+text(j(5), i(5), '2')
+text(j(5), i(6), '2')
+text(j(5), i(7), '2')
+text(j(5), i(8), '2')
+text(j(5), i(9), '2')
+text(j(5), i(10), '2')
+text(j(5), i(11), '2')
+text(j(5), i(12), '1')
+text(j(5), i(13), '1')
+text(j(5), i(14), '1')
+text(j(5), i(15), '1')
+text(j(5), i(16), '1')
+text(j(5), i(17), '1')
+text(j(5), i(18), '0')
+text(j(5), i(19), '0')
+text(j(5), i(20), '0')
+text(j(5), i(21), '0')
+text(j(5), i(22), '0')
+text(j(5), i(23), '0')
+text(j(5), i(24), '0')
+text(j(5), i(25), '0')
+text(j(5), i(26), '-1')
+text(j(5), i(27), '-1')
+text(j(5), i(28), '-1')
+text(j(5), i(29), '-1')
+text(j(5), i(30), '-1')
+text(j(5), i(31), '-1')
+text(j(5), i(32), '-2')
+text(j(5), i(33), '-2')
+text(j(5), i(34), '-2')
+text(j(5), i(35), '-2')
+text(j(5), i(36), '-2')
+text(j(5), i(37), '-2')
+text(j(5), i(38), '-2')
+text(j(5), i(39), '-2')
+text(j(5), i(40), '-2')
+text(j(5), i(41), '-2')
+text(j(5), i(42), '-2')
+
+text(j(6), i(1), '2')
+text(j(6), i(2), '2')
+text(j(6), i(3), '2')
+text(j(6), i(4), '2')
+text(j(6), i(5), '2')
+text(j(6), i(6), '2')
+text(j(6), i(7), '2')
+text(j(6), i(8), '2')
+text(j(6), i(9), '2')
+text(j(6), i(10), '2')
+text(j(6), i(11), '2')
+text(j(6), i(12), '1')
+text(j(6), i(13), '1')
+text(j(6), i(14), '1')
+text(j(6), i(15), '1')
+text(j(6), i(16), '1')
+text(j(6), i(17), '1')
+text(j(6), i(18), '0')
+text(j(6), i(19), '0')
+text(j(6), i(20), '0')
+text(j(6), i(21), '0')
+text(j(6), i(22), '0')
+text(j(6), i(23), '0')
+text(j(6), i(24), '0')
+text(j(6), i(25), '0')
+text(j(6), i(26), '-1')
+text(j(6), i(27), '-1')
+text(j(6), i(28), '-1')
+text(j(6), i(29), '-1')
+text(j(6), i(30), '-1')
+text(j(6), i(31), '-1')
+text(j(6), i(32), '-2')
+text(j(6), i(33), '-2')
+text(j(6), i(34), '-2')
+text(j(6), i(35), '-2')
+text(j(6), i(36), '-2')
+text(j(6), i(37), '-2')
+text(j(6), i(38), '-2')
+text(j(6), i(39), '-2')
+text(j(6), i(40), '-2')
+text(j(6), i(41), '-2')
+text(j(6), i(42), '-2')
+
+text(j(7), i(1), '2')
+text(j(7), i(2), '2')
+text(j(7), i(3), '2')
+text(j(7), i(4), '2')
+text(j(7), i(5), '2')
+text(j(7), i(6), '2')
+text(j(7), i(7), '2')
+text(j(7), i(8), '2')
+text(j(7), i(9), '2')
+text(j(7), i(10), '2')
+text(j(7), i(11), '2')
+text(j(7), i(12), '1')
+text(j(7), i(13), '1')
+text(j(7), i(14), '1')
+text(j(7), i(15), '1')
+text(j(7), i(16), '1')
+text(j(7), i(17), '1')
+text(j(7), i(18), '0')
+text(j(7), i(19), '0')
+text(j(7), i(20), '0')
+text(j(7), i(21), '0')
+text(j(7), i(22), '0')
+text(j(7), i(23), '0')
+text(j(7), i(24), '0')
+text(j(7), i(25), '0')
+text(j(7), i(26), '-1')
+text(j(7), i(27), '-1')
+text(j(7), i(28), '-1')
+text(j(7), i(29), '-1')
+text(j(7), i(30), '-1')
+text(j(7), i(31), '-1')
+text(j(7), i(32), '-2')
+text(j(7), i(33), '-2')
+text(j(7), i(34), '-2')
+text(j(7), i(35), '-2')
+text(j(7), i(36), '-2')
+text(j(7), i(37), '-2')
+text(j(7), i(38), '-2')
+text(j(7), i(39), '-2')
+text(j(7), i(40), '-2')
+text(j(7), i(41), '-2')
+text(j(7), i(42), '-2')
+
+text(j(8), i(1), '2')
+text(j(8), i(2), '2')
+text(j(8), i(3), '2')
+text(j(8), i(4), '2')
+text(j(8), i(5), '2')
+text(j(8), i(6), '2')
+text(j(8), i(7), '2')
+text(j(8), i(8), '2')
+text(j(8), i(9), '2')
+text(j(8), i(10), '1')
+text(j(8), i(11), '1')
+text(j(8), i(12), '1')
+text(j(8), i(13), '1')
+text(j(8), i(14), '1')
+text(j(8), i(15), '1')
+text(j(8), i(16), '1')
+text(j(8), i(17), '1')
+text(j(8), i(18), '0')
+text(j(8), i(19), '0')
+text(j(8), i(20), '0')
+text(j(8), i(21), '0')
+text(j(8), i(22), '0')
+text(j(8), i(23), '0')
+text(j(8), i(24), '0')
+text(j(8), i(25), '0')
+text(j(8), i(26), '-1')
+text(j(8), i(27), '-1')
+text(j(8), i(28), '-1')
+text(j(8), i(29), '-1')
+text(j(8), i(30), '-2')
+text(j(8), i(31), '-2')
+text(j(8), i(32), '-2')
+text(j(8), i(33), '-2')
+text(j(8), i(34), '-2')
+text(j(8), i(35), '-2')
+text(j(8), i(36), '-2')
+text(j(8), i(37), '-2')
+text(j(8), i(38), '-2')
+text(j(8), i(39), '-2')
+text(j(8), i(40), '-2')
+text(j(8), i(41), '-2')
+text(j(8), i(42), '-2')
+
+print -dpng pd_cpa.png
+
+
+
+
+
--- a/pipelined/srt/stine/srt4_pd2.m
+++ b/pipelined/srt/stine/srt4_pd2.m
@ -0,0 +1,333 @@
+%
+% Clear all variables and screen
+clear
+clf
+% Define the number of bits (input Dividend)
+n = 4;
+%
+% Define Divisor Range
+% Normalized Floating Point [Dmin,Dmax] = [1,2]
+% Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
+%
+Dminimum = 1.0/2;
+Dmaximum = 2.0/2;
+% Define an ulp
+ulp = 2^(-n);
+% radix = beta
+beta  = 4;
+% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
+%
+% SD representations have alpha < beta - 1
+%
+% alpha = ceil(beta/2)  minimially redundant  
+% alpha = beta -1       maximally redundant (rho = 1)
+% alpha = (beta-1)/2    nonredundant
+% alpha > beta - 1      over-redundant
+% 
+rho = 2/3;
+% Calculation of max digit set
+alpha = rho*(beta-1);
+% Da contains digit set
+q = [];
+for i = -alpha:alpha
+  q = [q; i];
+end
+% 4r(i-1)/D values
+hold on
+% figure(1)
+grid off
+for i = 1:length(q)
+  x = -rho+q(i):ulp:rho+q(i);
+  % Plot redundancy (overlap) Positive
+  z = [rho+q(i),rho+q(i)];
+  y = [x(length(x))-q(i),0];
+  % Plot redundancy (overlap) Negative
+  if (i ~= length(q))
+    w = [-rho+q(i+1)-q(i+1),0];
+    u = [-rho+q(i+1),-rho+q(i+1)];
+    % plot(u,w,'b')
+  end
+  % plot(x,x-q(i))
+  % plot(z,y,'r')
+
+end
+% title('Robertson Diagram for Radix-4 SRT Divison')
+
+Np   = 3;
+Nd   = 3;
+Dmin = Dminimum;
+Dmax = Dmaximum;
+ulpd = 2^(-Nd);
+ulpp = 2^(-Np);
+
+%
+% Plot Atkins P-D plot
+% Normalized Floating Point [Dmin,Dmax] = [1,2]
+% Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
+%
+Dmin = Dminimum;
+Dmax = Dmaximum;
+for i = 1:length(q)
+  D = Dmin:ulpd:Dmax;
+  P1 = (rho+q(i))*D;
+  P2 = (-rho+q(i))*D;
+  hold on
+  p1 = plot(D,P1,'b');
+  p2 = plot(D,P2,'r');
+  axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
+  xticks(D)
+  p1.LineWidth = 2.0;
+  p2.LineWidth = 2.0;
+end
+
+% Let's make x axis binary
+D = Dmin:ulpd:Dmax;
+j = [];
+for i=1:length(D)
+    j = [j disp_bin(D(i), 1, 3)];
+end
+yk = [];
+yk2 = [];
+for i=-2.5:0.5:2.5;
+    yk = [yk disp_bin(i, 3, 3)];
+    yk2 = [yk2 i];
+end
+xtickangle(90)
+xticklabels(j)
+yticklabels(yk)
+
+% Let's draw allow points on PD plot
+% Positive Portions
+index = 1;
+i = 0:ulpp:rho*beta*Dmaximum;
+for j = Dmin:ulpd:Dmax
+  plot(j*ones(1,length(i)),i,'k');
+end
+
+j = Dmin:ulpd:Dmax;
+for i = 0:ulpp:rho*beta*Dmaximum
+  plot(j,i*ones(length(j)),'k');
+end
+
+% Negative Portions
+index = 1;
+i = 0:-ulpp:rho*-beta*Dmaximum;
+for j = Dmin:ulpd:Dmax
+  plot(j*ones(1,length(i)),i,'k');
+end
+
+j = Dmin:ulpd:Dmax;
+for i = 0:-ulpp:-rho*beta*Dmaximum
+  plot(j,i*ones(length(j)),'k');
+end
+
+% Labels and Printing
+xlh = xlabel(['Divisor (d)']);
+xlh.Position(2) = xlh.Position(2) - 0.1;
+xlh.FontSize = 18;
+ylh = ylabel(['P = 4 \cdot w_i']);
+ylh.Position(1) = ylh.Position(1)-0.02;
+ylh.FontSize = 18;
+
+% Containment Values (placed manually although not bad)
+m2 = [5/6 1.0 5/4 11/8 11/8];
+m1 = [1/4 1/4 1/2 1/2 1/2];
+m0 = [-1/4 -1/4 -1/2 -1/2 -1/2];
+m1b = [-5/6 -1 -5/4 -11/8 -11/8];
+x2 = Dmin:ulpd:Dmax;
+s2 = stairs(x2, m2);
+s2.Color = '#8f08d1';
+s2.LineWidth = 3.0;
+s1 = stairs(x2, m1);
+s1.Color = '#8f08d1';
+s1.LineWidth = 3.0;
+s0 = stairs(x2, m0);
+s0.Color = '#8f08d1';
+s0.LineWidth = 3.0;
+s1b = stairs(x2, m1b);
+s1b.Color = '#8f08d1';
+s1b.LineWidth = 3.0;
+
+% Place manually Quotient (ugh)
+j = Dmin+ulpd/2:ulpd:Dmax;
+i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum;
+text(j(1), i(1), '2')
+text(j(1), i(2), '2')
+text(j(1), i(3), '2')
+text(j(1), i(4), '2')
+text(j(1), i(5), '2')
+text(j(1), i(6), '2')
+text(j(1), i(7), '2')
+text(j(1), i(8), '2')
+text(j(1), i(9), '2')
+text(j(1), i(10), '2')
+text(j(1), i(11), '2')
+text(j(1), i(12), '2')
+text(j(1), i(13), '2')
+text(j(1), i(14), '2')
+error1 = text(j(1), i(15), 'Full Precision', 'FontSize', 16);
+text(j(1), i(16), '1')
+text(j(1), i(17), '1')
+text(j(1), i(18), '1')
+text(j(1), i(19), '1')
+text(j(1), i(20), '0')
+text(j(1), i(21), '0')
+text(j(1), i(22), '0')
+text(j(1), i(23), '0')
+text(j(1), i(24), '-1')
+text(j(1), i(25), '-1')
+text(j(1), i(26), '-1')
+text(j(1), i(27), '-1')
+error2 = text(j(1), i(28), 'Full Precision', 'FontSize', 16);
+text(j(1), i(29), '-2')
+text(j(1), i(30), '-2')
+text(j(1), i(31), '-2')
+text(j(1), i(32), '-2')
+text(j(1), i(33), '-2')
+text(j(1), i(34), '-2')
+text(j(1), i(35), '-2')
+text(j(1), i(36), '-2')
+text(j(1), i(37), '-2')
+text(j(1), i(38), '-2')
+text(j(1), i(39), '-2')
+text(j(1), i(40), '-2')
+text(j(1), i(41), '-2')
+text(j(1), i(42), '-2')
+
+text(j(2), i(1), '2')
+text(j(2), i(2), '2')
+text(j(2), i(3), '2')
+text(j(2), i(4), '2')
+text(j(2), i(5), '2')
+text(j(2), i(6), '2')
+text(j(2), i(7), '2')
+text(j(2), i(8), '2')
+text(j(2), i(9), '2')
+text(j(2), i(10), '2')
+text(j(2), i(11), '2')
+text(j(2), i(12), '2')
+text(j(2), i(13), '2')
+text(j(2), i(14), '1')
+text(j(2), i(15), '1')
+text(j(2), i(16), '1')
+text(j(2), i(17), '1')
+text(j(2), i(18), '1')
+text(j(2), i(19), '1')
+text(j(2), i(20), '0')
+text(j(2), i(21), '0')
+text(j(2), i(22), '0')
+text(j(2), i(23), '0')
+text(j(2), i(24), '-1')
+text(j(2), i(25), '-1')
+text(j(2), i(26), '-1')
+text(j(2), i(27), '-1')
+text(j(2), i(28), '-1')
+text(j(2), i(29), '-1')
+text(j(2), i(30), '-2')
+text(j(2), i(31), '-2')
+text(j(2), i(32), '-2')
+text(j(2), i(33), '-2')
+text(j(2), i(34), '-2')
+text(j(2), i(35), '-2')
+text(j(2), i(36), '-2')
+text(j(2), i(37), '-2')
+text(j(2), i(38), '-2')
+text(j(2), i(39), '-2')
+text(j(2), i(40), '-2')
+text(j(2), i(41), '-2')
+text(j(2), i(42), '-2')
+
+text(j(3), i(1), '2')
+text(j(3), i(2), '2')
+text(j(3), i(3), '2')
+text(j(3), i(4), '2')
+text(j(3), i(5), '2')
+text(j(3), i(6), '2')
+text(j(3), i(7), '2')
+text(j(3), i(8), '2')
+text(j(3), i(9), '2')
+text(j(3), i(10), '2')
+text(j(3), i(11), '2')
+text(j(3), i(12), '1')
+text(j(3), i(13), '1')
+text(j(3), i(14), '1')
+text(j(3), i(15), '1')
+text(j(3), i(16), '1')
+text(j(3), i(17), '1')
+text(j(3), i(18), '0')
+text(j(3), i(19), '0')
+text(j(3), i(20), '0')
+text(j(3), i(21), '0')
+text(j(3), i(22), '0')
+text(j(3), i(23), '0')
+text(j(3), i(24), '0')
+text(j(3), i(25), '0')
+text(j(3), i(26), '-1')
+text(j(3), i(27), '-1')
+text(j(3), i(28), '-1')
+text(j(3), i(29), '-1')
+text(j(3), i(30), '-1')
+text(j(3), i(31), '-1')
+text(j(3), i(32), '-2')
+text(j(3), i(33), '-2')
+text(j(3), i(34), '-2')
+text(j(3), i(35), '-2')
+text(j(3), i(36), '-2')
+text(j(3), i(37), '-2')
+text(j(3), i(38), '-2')
+text(j(3), i(39), '-2')
+text(j(3), i(40), '-2')
+text(j(3), i(41), '-2')
+text(j(3), i(42), '-2')
+
+text(j(4), i(1), '2')
+text(j(4), i(2), '2')
+text(j(4), i(3), '2')
+text(j(4), i(4), '2')
+text(j(4), i(5), '2')
+text(j(4), i(6), '2')
+text(j(4), i(7), '2')
+text(j(4), i(8), '2')
+text(j(4), i(9), '2')
+text(j(4), i(10), '2')
+text(j(4), i(11), '1')
+text(j(4), i(12), '1')
+text(j(4), i(13), '1')
+text(j(4), i(14), '1')
+text(j(4), i(15), '1')
+text(j(4), i(16), '1')
+text(j(4), i(17), '1')
+text(j(4), i(18), '0')
+text(j(4), i(19), '0')
+text(j(4), i(20), '0')
+text(j(4), i(21), '0')
+text(j(4), i(22), '0')
+text(j(4), i(23), '0')
+text(j(4), i(24), '0')
+text(j(4), i(25), '0')
+text(j(4), i(26), '-1')
+text(j(4), i(27), '-1')
+text(j(4), i(28), '-1')
+text(j(4), i(29), '-1')
+text(j(4), i(30), '-1')
+text(j(4), i(31), '-1')
+text(j(4), i(32), '-1')
+text(j(4), i(33), '-2')
+text(j(4), i(34), '-2')
+text(j(4), i(35), '-2')
+text(j(4), i(36), '-2')
+text(j(4), i(37), '-2')
+text(j(4), i(38), '-2')
+text(j(4), i(39), '-2')
+text(j(4), i(40), '-2')
+text(j(4), i(41), '-2')
+text(j(4), i(42), '-2')
+
+
+
+print -dpng pd_bad.png
+
+
+
+
+
--- a/pipelined/srt/stine/srt4_pd3.m
+++ b/pipelined/srt/stine/srt4_pd3.m
@ -0,0 +1,855 @@
+%
+% Clear all variables and screen
+clear
+clf
+% Define the number of bits (input Dividend)
+n = 4;
+%
+% Define Divisor Range
+% Normalized Floating Point [Dmin,Dmax] = [1,2]
+% Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
+%
+Dminimum = 1.0/2;
+Dmaximum = 2.0/2;
+% Define an ulp
+ulp = 2^(-n);
+% radix = beta
+beta  = 4;
+% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
+%
+% SD representations have alpha < beta - 1
+%
+% alpha = ceil(beta/2)  minimially redundant  
+% alpha = beta -1       maximally redundant (rho = 1)
+% alpha = (beta-1)/2    nonredundant
+% alpha > beta - 1      over-redundant
+% 
+rho = 2/3;
+% Calculation of max digit set
+alpha = rho*(beta-1);
+% Da contains digit set
+q = [];
+for i = -alpha:alpha
+  q = [q; i];
+end
+% 4r(i-1)/D values
+hold on
+% figure(1)
+grid off
+for i = 1:length(q)
+  x = -rho+q(i):ulp:rho+q(i);
+  % Plot redundancy (overlap) Positive
+  z = [rho+q(i),rho+q(i)];
+  y = [x(length(x))-q(i),0];
+  % Plot redundancy (overlap) Negative
+  if (i ~= length(q))
+    w = [-rho+q(i+1)-q(i+1),0];
+    u = [-rho+q(i+1),-rho+q(i+1)];
+    % plot(u,w,'b')
+  end
+  % plot(x,x-q(i))
+  % plot(z,y,'r')
+
+end
+% title('Robertson Diagram for Radix-4 SRT Divison')
+
+%
+% Plot Atkins P-D plot
+% Normalized Floating Point [Dmin,Dmax] = [1,2]
+% Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
+%
+Dmin = Dminimum;
+Dmax = Dmaximum;
+for i = 1:length(q)
+  D = Dmin:ulp:Dmax;
+  P1 = (rho+q(i))*D;
+  P2 = (-rho+q(i))*D;
+  hold on
+  p1 = plot(D,P1,'b');
+  p2 = plot(D,P2,'r');
+  axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
+  xticks(D)
+  p1.LineWidth = 2.0;
+  p2.LineWidth = 2.0;
+end
+
+% Let's make x axis binary
+j = [];
+for i=1:length(D)
+    j = [j disp_bin(D(i), 1, 4)];
+end
+yk = [];
+yk2 = [];
+for i=-2.5:0.5:2.5;
+    yk = [yk disp_bin(i, 3, 4)];
+    yk2 = [yk2 i];
+end
+xtickangle(90)
+xticklabels(j)
+yticklabels(yk)
+
+Np   = 4;
+Nd   = 4;
+Dmin = Dminimum;
+Dmax = Dmaximum;
+ulpd = 2^(-Nd);
+ulpp = 2^(-Np);
+
+% Let's draw allow points on PD plot
+% Positive Portions
+index = 1;
+i = 0:ulpp:rho*beta*Dmaximum;
+for j = Dmin:ulpd:Dmax
+  plot(j*ones(1,length(i)),i,'k');
+end
+
+j = Dmin:ulpd:Dmax;
+for i = 0:ulpp:rho*beta*Dmaximum
+  plot(j,i*ones(length(j)),'k');
+end
+
+% Negative Portions
+index = 1;
+i = 0:-ulpp:rho*-beta*Dmaximum;
+for j = Dmin:ulpd:Dmax
+  plot(j*ones(1,length(i)),i,'k');
+end
+
+j = Dmin:ulpd:Dmax;
+for i = 0:-ulpp:-rho*beta*Dmaximum
+  plot(j,i*ones(length(j)),'k');
+end
+
+% Labels and Printing
+xlh = xlabel(['Divisor (d)']);
+xlh.Position(2) = xlh.Position(2) - 0.1;
+%xlh.FontSize = 18;
+ylh = ylabel(['P = 4 \cdot w_i']);
+ylh.Position(1) = ylh.Position(1)-0.02;
+%ylh.FontSize = 18;
+
+% Containment Values (placed manually although not bad)
+m2 = [3/4 7/8 15/16 1.0 9/8 19/16 5/4 6/4 6/4];
+m1 = [1/4 1/4 1/4 1/4 3/8 3/8 1/2 1/2 1/2];
+m0 = [-1/4 -3/8 -3/8 -3/8 -1/2 -1/2 -1/2 -1/2 -1/2];
+m1b = [-13/16 -15/16 -1 -9/8 -5/4 -5/4 -11/8 -6/4 -6/4];
+x2 = Dmin:ulpd:Dmax;
+s2 = stairs(x2, m2);
+s2.Color = '#8f08d1';
+s2.LineWidth = 3.0;
+s1 = stairs(x2, m1);
+s1.Color = '#8f08d1';
+s1.LineWidth = 3.0;
+s0 = stairs(x2, m0);
+s0.Color = '#8f08d1';
+s0.LineWidth = 3.0;
+s1b = stairs(x2, m1b);
+s1b.Color = '#8f08d1';
+s1b.LineWidth = 3.0;
+
+% Place manually Quotient (ugh)
+j = Dmin+ulpd/2:ulpd:Dmax;
+i = rho*beta*Dmaximum-ulpp:-ulpp:-rho*beta*Dmaximum;
+
+% 1
+text(j(1), i(1), '2')
+text(j(1), i(2), '2')
+text(j(1), i(3), '2')
+text(j(1), i(4), '2')
+text(j(1), i(5), '2')
+text(j(1), i(6), '2')
+text(j(1), i(7), '2')
+text(j(1), i(8), '2')
+text(j(1), i(9), '2')
+text(j(1), i(10), '2')
+text(j(1), i(11), '2')
+text(j(1), i(12), '2')
+text(j(1), i(13), '2')
+text(j(1), i(14), '2')
+text(j(1), i(15), '2')
+text(j(1), i(16), '2')
+text(j(1), i(17), '2')
+text(j(1), i(18), '2')
+text(j(1), i(19), '2')
+text(j(1), i(20), '2')
+text(j(1), i(21), '2')
+text(j(1), i(22), '2')
+text(j(1), i(23), '2')
+text(j(1), i(24), '2')
+text(j(1), i(25), '2')
+text(j(1), i(26), '2')
+text(j(1), i(27), '2')
+text(j(1), i(28), '2')
+text(j(1), i(29), '2')
+text(j(1), i(30), '2')
+text(j(1), i(31), '1')
+text(j(1), i(32), '1')
+text(j(1), i(33), '1')
+text(j(1), i(34), '1')
+text(j(1), i(35), '1')
+text(j(1), i(36), '1')
+text(j(1), i(37), '1')
+text(j(1), i(38), '1')
+text(j(1), i(39), '0')
+text(j(1), i(40), '0')
+text(j(1), i(41), '0')
+text(j(1), i(42), '0')
+
+text(j(1), i(43), '0')
+text(j(1), i(44), '0')
+text(j(1), i(45), '0')
+text(j(1), i(46), '0')
+text(j(1), i(47), '-1')
+text(j(1), i(48), '-1')
+text(j(1), i(49), '-1')
+text(j(1), i(50), '-1')
+text(j(1), i(51), '-1')
+text(j(1), i(52), '-1')
+text(j(1), i(53), '-1')
+text(j(1), i(54), '-1')
+text(j(1), i(55), '-1')
+text(j(1), i(56), '-2')
+text(j(1), i(57), '-2')
+text(j(1), i(58), '-2')
+text(j(1), i(59), '-2')
+text(j(1), i(60), '-2')
+text(j(1), i(61), '-2')
+text(j(1), i(62), '-2')
+text(j(1), i(63), '-2')
+text(j(1), i(64), '-2')
+text(j(1), i(65), '-2')
+text(j(1), i(66), '-2')
+text(j(1), i(67), '-2')
+text(j(1), i(68), '-2')
+text(j(1), i(69), '-2')
+text(j(1), i(70), '-2')
+text(j(1), i(71), '-2')
+text(j(1), i(72), '-2')
+text(j(1), i(73), '-2')
+text(j(1), i(74), '-2')
+text(j(1), i(75), '-2')
+text(j(1), i(76), '-2')
+text(j(1), i(77), '-2')
+text(j(1), i(78), '-2')
+text(j(1), i(79), '-2')
+text(j(1), i(80), '-2')
+text(j(1), i(81), '-2')
+text(j(1), i(82), '-2')
+text(j(1), i(83), '-2')
+text(j(1), i(84), '-2')
+
+text(j(2), i(1), '2')
+text(j(2), i(2), '2')
+text(j(2), i(3), '2')
+text(j(2), i(4), '2')
+text(j(2), i(5), '2')
+text(j(2), i(6), '2')
+text(j(2), i(7), '2')
+text(j(2), i(8), '2')
+text(j(2), i(9), '2')
+text(j(2), i(10), '2')
+text(j(2), i(11), '2')
+text(j(2), i(12), '2')
+text(j(2), i(13), '2')
+text(j(2), i(14), '2')
+text(j(2), i(15), '2')
+text(j(2), i(16), '2')
+text(j(2), i(17), '2')
+text(j(2), i(18), '2')
+text(j(2), i(19), '2')
+text(j(2), i(20), '2')
+text(j(2), i(21), '2')
+text(j(2), i(22), '2')
+text(j(2), i(23), '2')
+text(j(2), i(24), '2')
+text(j(2), i(25), '2')
+text(j(2), i(26), '2')
+text(j(2), i(27), '2')
+text(j(2), i(28), '2')
+text(j(2), i(29), '1')
+text(j(2), i(30), '1')
+text(j(2), i(31), '1')
+text(j(2), i(32), '1')
+text(j(2), i(33), '1')
+text(j(2), i(34), '1')
+text(j(2), i(35), '1')
+text(j(2), i(36), '1')
+text(j(2), i(37), '1')
+text(j(2), i(38), '1')
+text(j(2), i(39), '0')
+text(j(2), i(40), '0')
+text(j(2), i(41), '0')
+text(j(2), i(42), '0')
+
+text(j(2), i(43), '0')
+text(j(2), i(44), '0')
+text(j(2), i(45), '0')
+text(j(2), i(46), '0')
+text(j(2), i(47), '0')
+text(j(2), i(48), '0')
+text(j(2), i(49), '-1')
+text(j(2), i(50), '-1')
+text(j(2), i(51), '-1')
+text(j(2), i(52), '-1')
+text(j(2), i(53), '-1')
+text(j(2), i(54), '-1')
+text(j(2), i(55), '-1')
+text(j(2), i(56), '-1')
+text(j(2), i(57), '-1')
+text(j(2), i(58), '-2')
+text(j(2), i(59), '-2')
+text(j(2), i(60), '-2')
+text(j(2), i(61), '-2')
+text(j(2), i(62), '-2')
+text(j(2), i(63), '-2')
+text(j(2), i(64), '-2')
+text(j(2), i(65), '-2')
+text(j(2), i(66), '-2')
+text(j(2), i(67), '-2')
+text(j(2), i(68), '-2')
+text(j(2), i(69), '-2')
+text(j(2), i(70), '-2')
+text(j(2), i(71), '-2')
+text(j(2), i(72), '-2')
+text(j(2), i(73), '-2')
+text(j(2), i(74), '-2')
+text(j(2), i(75), '-2')
+text(j(2), i(76), '-2')
+text(j(2), i(77), '-2')
+text(j(2), i(78), '-2')
+text(j(2), i(79), '-2')
+text(j(2), i(80), '-2')
+text(j(2), i(81), '-2')
+text(j(2), i(82), '-2')
+text(j(2), i(83), '-2')
+text(j(2), i(84), '-2')
+
+% 3
+text(j(3), i(1), '2')
+text(j(3), i(2), '2')
+text(j(3), i(3), '2')
+text(j(3), i(4), '2')
+text(j(3), i(5), '2')
+text(j(3), i(6), '2')
+text(j(3), i(7), '2')
+text(j(3), i(8), '2')
+text(j(3), i(9), '2')
+text(j(3), i(10), '2')
+text(j(3), i(11), '2')
+text(j(3), i(12), '2')
+text(j(3), i(13), '2')
+text(j(3), i(14), '2')
+text(j(3), i(15), '2')
+text(j(3), i(16), '2')
+text(j(3), i(17), '2')
+text(j(3), i(18), '2')
+text(j(3), i(19), '2')
+text(j(3), i(20), '2')
+text(j(3), i(21), '2')
+text(j(3), i(22), '2')
+text(j(3), i(23), '2')
+text(j(3), i(24), '2')
+text(j(3), i(25), '2')
+text(j(3), i(26), '2')
+text(j(3), i(27), '2')
+text(j(3), i(28), '1')
+text(j(3), i(29), '1')
+text(j(3), i(30), '1')
+text(j(3), i(31), '1')
+text(j(3), i(32), '1')
+text(j(3), i(33), '1')
+text(j(3), i(34), '1')
+text(j(3), i(35), '1')
+text(j(3), i(36), '1')
+text(j(3), i(37), '1')
+text(j(3), i(38), '1')
+text(j(3), i(39), '0')
+text(j(3), i(40), '0')
+text(j(3), i(41), '0')
+text(j(3), i(42), '0')
+
+text(j(3), i(43), '0')
+text(j(3), i(44), '0')
+text(j(3), i(45), '0')
+text(j(3), i(46), '0')
+text(j(3), i(47), '0')
+text(j(3), i(48), '0')
+text(j(3), i(49), '-1')
+text(j(3), i(50), '-1')
+text(j(3), i(51), '-1')
+text(j(3), i(52), '-1')
+text(j(3), i(53), '-1')
+text(j(3), i(54), '-1')
+text(j(3), i(55), '-1')
+text(j(3), i(56), '-1')
+text(j(3), i(57), '-1')
+text(j(3), i(58), '-1')
+text(j(3), i(59), '-2')
+text(j(3), i(60), '-2')
+text(j(3), i(61), '-2')
+text(j(3), i(62), '-2')
+text(j(3), i(63), '-2')
+text(j(3), i(64), '-2')
+text(j(3), i(65), '-2')
+text(j(3), i(66), '-2')
+text(j(3), i(67), '-2')
+text(j(3), i(68), '-2')
+text(j(3), i(69), '-2')
+text(j(3), i(70), '-2')
+text(j(3), i(71), '-2')
+text(j(3), i(72), '-2')
+text(j(3), i(73), '-2')
+text(j(3), i(74), '-2')
+text(j(3), i(75), '-2')
+text(j(3), i(76), '-2')
+text(j(3), i(77), '-2')
+text(j(3), i(78), '-2')
+text(j(3), i(79), '-2')
+text(j(3), i(80), '-2')
+text(j(3), i(81), '-2')
+text(j(3), i(82), '-2')
+text(j(3), i(83), '-2')
+text(j(3), i(84), '-2')
+
+% 4
+text(j(4), i(1), '2')
+text(j(4), i(2), '2')
+text(j(4), i(3), '2')
+text(j(4), i(4), '2')
+text(j(4), i(5), '2')
+text(j(4), i(6), '2')
+text(j(4), i(7), '2')
+text(j(4), i(8), '2')
+text(j(4), i(9), '2')
+text(j(4), i(10), '2')
+text(j(4), i(11), '2')
+text(j(4), i(12), '2')
+text(j(4), i(13), '2')
+text(j(4), i(14), '2')
+text(j(4), i(15), '2')
+text(j(4), i(16), '2')
+text(j(4), i(17), '2')
+text(j(4), i(18), '2')
+text(j(4), i(19), '2')
+text(j(4), i(20), '2')
+text(j(4), i(21), '2')
+text(j(4), i(22), '2')
+text(j(4), i(23), '2')
+text(j(4), i(24), '2')
+text(j(4), i(25), '2')
+text(j(4), i(26), '2')
+text(j(4), i(27), '1')
+text(j(4), i(28), '1')
+text(j(4), i(29), '1')
+text(j(4), i(30), '1')
+text(j(4), i(31), '1')
+text(j(4), i(32), '1')
+text(j(4), i(33), '1')
+text(j(4), i(34), '1')
+text(j(4), i(35), '1')
+text(j(4), i(36), '1')
+text(j(4), i(37), '1')
+text(j(4), i(38), '1')
+text(j(4), i(39), '0')
+text(j(4), i(40), '0')
+text(j(4), i(41), '0')
+text(j(4), i(42), '0')
+
+text(j(4), i(43), '0')
+text(j(4), i(44), '0')
+text(j(4), i(45), '0')
+text(j(4), i(46), '0')
+text(j(4), i(47), '0')
+text(j(4), i(48), '0')
+text(j(4), i(49), '-1')
+text(j(4), i(50), '-1')
+text(j(4), i(51), '-1')
+text(j(4), i(52), '-1')
+text(j(4), i(53), '-1')
+text(j(4), i(54), '-1')
+text(j(4), i(55), '-1')
+text(j(4), i(56), '-1')
+text(j(4), i(57), '-1')
+text(j(4), i(58), '-1')
+text(j(4), i(59), '-1')
+text(j(4), i(60), '-1')
+text(j(4), i(61), '-2')
+text(j(4), i(62), '-2')
+text(j(4), i(63), '-2')
+text(j(4), i(64), '-2')
+text(j(4), i(65), '-2')
+text(j(4), i(66), '-2')
+text(j(4), i(67), '-2')
+text(j(4), i(68), '-2')
+text(j(4), i(69), '-2')
+text(j(4), i(70), '-2')
+text(j(4), i(71), '-2')
+text(j(4), i(72), '-2')
+text(j(4), i(73), '-2')
+text(j(4), i(74), '-2')
+text(j(4), i(75), '-2')
+text(j(4), i(76), '-2')
+text(j(4), i(77), '-2')
+text(j(4), i(78), '-2')
+text(j(4), i(79), '-2')
+text(j(4), i(80), '-2')
+text(j(4), i(81), '-2')
+text(j(4), i(82), '-2')
+text(j(4), i(83), '-2')
+text(j(4), i(84), '-2')
+
+% 5
+text(j(5), i(1), '2')
+text(j(5), i(2), '2')
+text(j(5), i(3), '2')
+text(j(5), i(4), '2')
+text(j(5), i(5), '2')
+text(j(5), i(6), '2')
+text(j(5), i(7), '2')
+text(j(5), i(8), '2')
+text(j(5), i(9), '2')
+text(j(5), i(10), '2')
+text(j(5), i(11), '2')
+text(j(5), i(12), '2')
+text(j(5), i(13), '2')
+text(j(5), i(14), '2')
+text(j(5), i(15), '2')
+text(j(5), i(16), '2')
+text(j(5), i(17), '2')
+text(j(5), i(18), '2')
+text(j(5), i(19), '2')
+text(j(5), i(20), '2')
+text(j(5), i(21), '2')
+text(j(5), i(22), '2')
+text(j(5), i(23), '2')
+text(j(5), i(24), '2')
+text(j(5), i(25), '1')
+text(j(5), i(26), '1')
+text(j(5), i(27), '1')
+text(j(5), i(28), '1')
+text(j(5), i(29), '1')
+text(j(5), i(30), '1')
+text(j(5), i(31), '1')
+text(j(5), i(32), '1')
+text(j(5), i(33), '1')
+text(j(5), i(34), '1')
+text(j(5), i(35), '1')
+text(j(5), i(36), '1')
+text(j(5), i(37), '0')
+text(j(5), i(38), '0')
+text(j(5), i(39), '0')
+text(j(5), i(40), '0')
+text(j(5), i(41), '0')
+text(j(5), i(42), '0')
+
+text(j(5), i(43), '0')
+text(j(5), i(44), '0')
+text(j(5), i(45), '0')
+text(j(5), i(46), '0')
+text(j(5), i(47), '0')
+text(j(5), i(48), '0')
+text(j(5), i(49), '0')
+text(j(5), i(50), '0')
+text(j(5), i(51), '-1')
+text(j(5), i(52), '-1')
+text(j(5), i(53), '-1')
+text(j(5), i(54), '-1')
+text(j(5), i(55), '-1')
+text(j(5), i(56), '-1')
+text(j(5), i(57), '-1')
+text(j(5), i(58), '-1')
+text(j(5), i(59), '-1')
+text(j(5), i(60), '-1')
+text(j(5), i(61), '-1')
+text(j(5), i(62), '-1')
+text(j(5), i(63), '-2')
+text(j(5), i(64), '-2')
+text(j(5), i(65), '-2')
+text(j(5), i(66), '-2')
+text(j(5), i(67), '-2')
+text(j(5), i(68), '-2')
+text(j(5), i(69), '-2')
+text(j(5), i(70), '-2')
+text(j(5), i(71), '-2')
+text(j(5), i(72), '-2')
+text(j(5), i(73), '-2')
+text(j(5), i(74), '-2')
+text(j(5), i(75), '-2')
+text(j(5), i(76), '-2')
+text(j(5), i(77), '-2')
+text(j(5), i(78), '-2')
+text(j(5), i(79), '-2')
+text(j(5), i(80), '-2')
+text(j(5), i(81), '-2')
+text(j(5), i(82), '-2')
+text(j(5), i(83), '-2')
+text(j(5), i(84), '-2')
+
+% 6
+text(j(6), i(1), '2')
+text(j(6), i(2), '2')
+text(j(6), i(3), '2')
+text(j(6), i(4), '2')
+text(j(6), i(5), '2')
+text(j(6), i(6), '2')
+text(j(6), i(7), '2')
+text(j(6), i(8), '2')
+text(j(6), i(9), '2')
+text(j(6), i(10), '2')
+text(j(6), i(11), '2')
+text(j(6), i(12), '2')
+text(j(6), i(13), '2')
+text(j(6), i(14), '2')
+text(j(6), i(15), '2')
+text(j(6), i(16), '2')
+text(j(6), i(17), '2')
+text(j(6), i(18), '2')
+text(j(6), i(19), '2')
+text(j(6), i(20), '2')
+text(j(6), i(21), '2')
+text(j(6), i(22), '2')
+text(j(6), i(23), '2')
+text(j(6), i(24), '1')
+text(j(6), i(25), '1')
+text(j(6), i(26), '1')
+text(j(6), i(27), '1')
+text(j(6), i(28), '1')
+text(j(6), i(29), '1')
+text(j(6), i(30), '1')
+text(j(6), i(31), '1')
+text(j(6), i(32), '1')
+text(j(6), i(33), '1')
+text(j(6), i(34), '1')
+text(j(6), i(35), '1')
+text(j(6), i(36), '1')
+text(j(6), i(37), '0')
+text(j(6), i(38), '0')
+text(j(6), i(39), '0')
+text(j(6), i(40), '0')
+text(j(6), i(41), '0')
+text(j(6), i(42), '0')
+
+text(j(6), i(43), '0')
+text(j(6), i(44), '0')
+text(j(6), i(45), '0')
+text(j(6), i(46), '0')
+text(j(6), i(47), '0')
+text(j(6), i(48), '0')
+text(j(6), i(49), '0')
+text(j(6), i(50), '0')
+text(j(6), i(51), '-1')
+text(j(6), i(52), '-1')
+text(j(6), i(53), '-1')
+text(j(6), i(54), '-1')
+text(j(6), i(55), '-1')
+text(j(6), i(56), '-1')
+text(j(6), i(57), '-1')
+text(j(6), i(58), '-1')
+text(j(6), i(59), '-1')
+text(j(6), i(60), '-1')
+text(j(6), i(61), '-1')
+text(j(6), i(62), '-1')
+text(j(6), i(63), '-2')
+text(j(6), i(64), '-2')
+text(j(6), i(65), '-2')
+text(j(6), i(66), '-2')
+text(j(6), i(67), '-2')
+text(j(6), i(68), '-2')
+text(j(6), i(69), '-2')
+text(j(6), i(70), '-2')
+text(j(6), i(71), '-2')
+text(j(6), i(72), '-2')
+text(j(6), i(73), '-2')
+text(j(6), i(74), '-2')
+text(j(6), i(75), '-2')
+text(j(6), i(76), '-2')
+text(j(6), i(77), '-2')
+text(j(6), i(78), '-2')
+text(j(6), i(79), '-2')
+text(j(6), i(80), '-2')
+text(j(6), i(81), '-2')
+text(j(6), i(82), '-2')
+text(j(6), i(83), '-2')
+text(j(6), i(84), '-2')
+
+% 7
+text(j(7), i(1), '2')
+text(j(7), i(2), '2')
+text(j(7), i(3), '2')
+text(j(7), i(4), '2')
+text(j(7), i(5), '2')
+text(j(7), i(6), '2')
+text(j(7), i(7), '2')
+text(j(7), i(8), '2')
+text(j(7), i(9), '2')
+text(j(7), i(10), '2')
+text(j(7), i(11), '2')
+text(j(7), i(12), '2')
+text(j(7), i(13), '2')
+text(j(7), i(14), '2')
+text(j(7), i(15), '2')
+text(j(7), i(16), '2')
+text(j(7), i(17), '2')
+text(j(7), i(18), '2')
+text(j(7), i(19), '2')
+text(j(7), i(20), '2')
+text(j(7), i(21), '2')
+text(j(7), i(22), '2')
+text(j(7), i(23), '1')
+text(j(7), i(24), '1')
+text(j(7), i(25), '1')
+text(j(7), i(26), '1')
+text(j(7), i(27), '1')
+text(j(7), i(28), '1')
+text(j(7), i(29), '1')
+text(j(7), i(30), '1')
+text(j(7), i(31), '1')
+text(j(7), i(32), '1')
+text(j(7), i(33), '1')
+text(j(7), i(34), '1')
+text(j(7), i(35), '0')
+text(j(7), i(36), '0')
+text(j(7), i(37), '0')
+text(j(7), i(38), '0')
+text(j(7), i(39), '0')
+text(j(7), i(40), '0')
+text(j(7), i(41), '0')
+text(j(7), i(42), '0')
+
+text(j(7), i(43), '0')
+text(j(7), i(44), '0')
+text(j(7), i(45), '0')
+text(j(7), i(46), '0')
+text(j(7), i(47), '0')
+text(j(7), i(48), '0')
+text(j(7), i(49), '0')
+text(j(7), i(50), '0')
+text(j(7), i(51), '-1')
+text(j(7), i(52), '-1')
+text(j(7), i(53), '-1')
+text(j(7), i(54), '-1')
+text(j(7), i(55), '-1')
+text(j(7), i(56), '-1')
+text(j(7), i(57), '-1')
+text(j(7), i(58), '-1')
+text(j(7), i(59), '-1')
+text(j(7), i(60), '-1')
+text(j(7), i(61), '-1')
+text(j(7), i(62), '-1')
+text(j(7), i(63), '-1')
+text(j(7), i(64), '-1')
+text(j(7), i(65), '-2')
+text(j(7), i(66), '-2')
+text(j(7), i(67), '-2')
+text(j(7), i(68), '-2')
+text(j(7), i(69), '-2')
+text(j(7), i(70), '-2')
+text(j(7), i(71), '-2')
+text(j(7), i(72), '-2')
+text(j(7), i(73), '-2')
+text(j(7), i(74), '-2')
+text(j(7), i(75), '-2')
+text(j(7), i(76), '-2')
+text(j(7), i(77), '-2')
+text(j(7), i(78), '-2')
+text(j(7), i(79), '-2')
+text(j(7), i(80), '-2')
+text(j(7), i(81), '-2')
+text(j(7), i(82), '-2')
+text(j(7), i(83), '-2')
+text(j(7), i(84), '-2')
+
+% 8
+text(j(8), i(1), '2')
+text(j(8), i(2), '2')
+text(j(8), i(3), '2')
+text(j(8), i(4), '2')
+text(j(8), i(5), '2')
+text(j(8), i(6), '2')
+text(j(8), i(7), '2')
+text(j(8), i(8), '2')
+text(j(8), i(9), '2')
+text(j(8), i(10), '2')
+text(j(8), i(11), '2')
+text(j(8), i(12), '2')
+text(j(8), i(13), '2')
+text(j(8), i(14), '2')
+text(j(8), i(15), '2')
+text(j(8), i(16), '2')
+text(j(8), i(17), '2')
+text(j(8), i(18), '2')
+text(j(8), i(19), '1')
+text(j(8), i(20), '1')
+text(j(8), i(21), '1')
+text(j(8), i(22), '1')
+text(j(8), i(23), '1')
+text(j(8), i(24), '1')
+text(j(8), i(25), '1')
+text(j(8), i(26), '1')
+text(j(8), i(27), '1')
+text(j(8), i(28), '1')
+text(j(8), i(29), '1')
+text(j(8), i(30), '1')
+text(j(8), i(31), '1')
+text(j(8), i(32), '1')
+text(j(8), i(33), '1')
+text(j(8), i(34), '1')
+text(j(8), i(35), '0')
+text(j(8), i(36), '0')
+text(j(8), i(37), '0')
+text(j(8), i(38), '0')
+text(j(8), i(39), '0')
+text(j(8), i(40), '0')
+text(j(8), i(41), '0')
+text(j(8), i(42), '0')
+
+text(j(8), i(43), '0')
+text(j(8), i(44), '0')
+text(j(8), i(45), '0')
+text(j(8), i(46), '0')
+text(j(8), i(47), '0')
+text(j(8), i(48), '0')
+text(j(8), i(49), '0')
+text(j(8), i(50), '0')
+text(j(8), i(51), '-1')
+text(j(8), i(52), '-1')
+text(j(8), i(53), '-1')
+text(j(8), i(54), '-1')
+text(j(8), i(55), '-1')
+text(j(8), i(56), '-1')
+text(j(8), i(57), '-1')
+text(j(8), i(58), '-1')
+text(j(8), i(59), '-1')
+text(j(8), i(60), '-1')
+text(j(8), i(61), '-1')
+text(j(8), i(62), '-1')
+text(j(8), i(63), '-1')
+text(j(8), i(64), '-1')
+text(j(8), i(65), '-1')
+text(j(8), i(66), '-1')
+text(j(8), i(67), '-2')
+text(j(8), i(68), '-2')
+text(j(8), i(69), '-2')
+text(j(8), i(70), '-2')
+text(j(8), i(71), '-2')
+text(j(8), i(72), '-2')
+text(j(8), i(73), '-2')
+text(j(8), i(74), '-2')
+text(j(8), i(75), '-2')
+text(j(8), i(76), '-2')
+text(j(8), i(77), '-2')
+text(j(8), i(78), '-2')
+text(j(8), i(79), '-2')
+text(j(8), i(80), '-2')
+text(j(8), i(81), '-2')
+text(j(8), i(82), '-2')
+text(j(8), i(83), '-2')
+text(j(8), i(84), '-2')
+
+orient('landscape')
+print -dpng 'pd_csa.png'
+
+
+
+
+
--- a/pipelined/srt/stine/srt4div
+++ b/pipelined/srt/stine/srt4div
--- a/pipelined/srt/stine/srt4div.c
+++ b/pipelined/srt/stine/srt4div.c
@ -0,0 +1,226 @@
+#include "disp.h"
+#include <math.h>
+
+// QSLC is for division by recuerrence for
+// r=4 using a CPA - See Table 5.9 EL
+int qslc (double prem, double d) {
+
+  int q;
+
+  // For Debugging
+  printf("d  --> %lg\n", d);
+  printf("rw --> %lg\n", prem);
+  
+  if ((d>=8.0)&&(d<9.0)) {
+    if (prem>=6.0)
+       q = 2;
+    else if (prem>=2.0)
+      q = 1;
+    else if (prem>=-2.0)
+      q = 0;
+    else if (prem >= -6)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=9.0)&&(d<10.0)) {
+    if (prem>=7)
+       q = 2;
+    else if (prem>=2.0)
+      q = 1;
+    else if (prem>=-2.0)
+      q = 0;
+    else if (prem >= 7.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=10.0)&&(d<11.0)) {
+    if (prem>=8.0)
+       q = 2;
+    else if (prem>=2.0)
+      q = 1;
+    else if (prem>=-2.0)
+      q = 0;
+    else if (prem >= -8.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=11.0)&&(d<12.0)) {
+    if (prem>=8.0)
+       q = 2;
+    else if (prem>=2.0)
+      q = 1;
+    else if (prem>=-2.0)
+      q = 0;
+    else if (prem >= -8.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=12.0)&&(d<13.0)) {
+    if (prem>=10.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -10.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=13.0)&&(d<14.0)) {
+    if (prem>=10.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -10.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=14.0)&&(d<15.0)) {
+    if (prem>=10.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -10.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+  if ((d>=15.0)&&(d<16.0)) {
+    if (prem>=12.0)
+       q = 2;
+    else if (prem>=4.0)
+      q = 1;
+    else if (prem>=-4.0)
+      q = 0;
+    else if (prem >= -12.0)
+      q = -1;
+    else 
+      q = -2;
+    return q;
+  }
+
+}
+
+
+/*
+ This routine performs a radix-4 SRT division 
+ algorithm.  The user inputs the numerator, the denominator, 
+ and the number of iterations. It assumes that 0.5 <= D < 1.
+        
+*/
+
+int main(int argc, char* argv[]) {
+
+   double P, N, D, Q, RQ, RD, RREM, scale;   
+   int q;
+   int num_iter, i;
+   int prec;
+   int radix = 4;
+   
+   if (argc < 5) {
+      fprintf(stderr,
+	      "Usage: %s numerator denominator num_iterations prec\n", 
+	      argv[0]);
+      exit(1);
+   }
+   sscanf(argv[1],"%lg", &N);
+   sscanf(argv[2],"%lg", &D);
+   sscanf(argv[3],"%d", &num_iter);
+   sscanf(argv[4],"%d", &prec);
+   // Round to precision
+   N = rne(N, prec);
+   D = rne(D, prec);
+   printf("N = ");
+   disp_bin(N, 3, prec, stdout);
+   printf("\n");
+   printf("D = ");
+   disp_bin(D, 3, prec, stdout);
+   printf("\n");
+
+   Q = 0;
+   P = N * pow(2.0, -log2(radix));
+   printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n", 
+	  N, D, N/D, num_iter); 
+   for (scale = 1, i = 0; i < num_iter; i++) {
+     // Shift by r
+     scale = scale * pow(2.0, -log2(radix));
+     // (4*P)*8 because of footnote in Table 5.9, page 296 EL
+     // i.e., real value = shown value / 8
+     // D*16 since we use 4 bits of D (1 bit known)
+     q = qslc(flr((radix * P) * 8, 3), D*16);
+     printf("4*W[n] = ");
+     disp_bin(radix*P, 3, prec, stdout);
+     printf("\n");
+     printf("q*D = ");      
+     disp_bin(q*D, 3, prec, stdout);
+     printf("\n");
+     printf("W[n+1] = ");            
+     disp_bin(P ,3, prec, stdout);
+     printf("\n");
+     // Recurrence
+     P = radix * P - q * D;
+     // OTFC
+     Q = Q + q * scale;
+     printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P); 
+     printf("i = %d, q = %d", i, q);
+     printf(", Q = ");
+     disp_bin(Q, 3, prec, stdout);
+     printf(", W = ");
+     disp_bin(P, 3, prec, stdout);
+     printf("\n\n");
+   }
+   // Is shifted partial remainder negative?
+   if (P < 0) {
+     Q = Q - pow(2.0, -prec);
+     P = P + D;
+     printf("\nCorrecting Negative Remainder\n"); 
+     printf("Q = %1.18lf, W = %1.18lf\n", Q, P); 
+     printf("Q = ");
+     disp_bin(Q, 3, prec, stdout);
+     printf(", W = ");
+     disp_bin(P, 3, prec, stdout);
+     printf("\n");
+   }
+
+   // Output Results
+   RQ = flr(N/D, prec);
+   // Since q_{computed} = q / radix, multiply by radix
+   RD = Q * radix;
+   printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
+   printf("true = ");
+   disp_bin(RQ, 3, prec, stdout);
+   printf(", computed = ");
+   disp_bin(RD, 3, prec, stdout);
+   printf("\n\n");
+   printf("REM = %1.18lf \n", P);
+   printf("REM = ");
+   disp_bin(P, 3, prec, stdout);
+   printf("\n\n");
+
+   return 0;
+
+}
--- a/pipelined/srt/stine/test_iter128.sv
+++ b/pipelined/srt/stine/test_iter128.sv
@ -0,0 +1,79 @@
+`include "idiv-config.vh"
+
+module tb;
+
+   logic [127:0]  N, D;
+   logic 	  clk;
+   logic 	  reset;   
+   logic 	  start;
+   logic 	  S;   
+   
+   logic [127:0]   Q;
+   logic [127:0]  rem0;
+   logic 	 div0;
+   logic 	 done;
+   
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+
+   logic [127:0]  Ncomp;
+   logic [127:0]  Dcomp;
+   logic [127:0]  Qcomp;
+   logic [127:0]  Rcomp;   
+   
+   logic [31:0]  vectornum;
+   logic [31:0]  errors;   
+   
+   intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
+   
+   initial 
+     begin	
+	clk = 1'b0;
+	forever #5 clk = ~clk;
+     end
+
+   initial
+     begin
+	vectornum = 0;
+	errors = 0;	
+	handle3 = $fopen("iter128_unsigned.out");
+     end
+
+   always @(posedge clk, posedge reset)
+     begin
+	desc3 = handle3;	
+	#0  start = 1'b0;
+	#0  S = 1'b0;	
+	#0  reset = 1'b1;
+	#30 reset = 1'b0;
+	#30 N = 128'h0;
+	#0  D = 128'h0;	
+	for (i=0; i<`IDIV_TESTS; i=i+1)
+	  begin
+	     N = {$urandom(), $urandom(), $urandom(), $urandom()};
+	     D = {$urandom(), $urandom(), $urandom(), $urandom()};
+	     start <= 1'b1;
+	     // Wait 2 cycles (to be sure)
+	     repeat (2)
+	       @(posedge clk);
+	     start <= 1'b0;	     
+	     repeat (41)
+	       @(posedge clk);
+	     Ncomp = N;
+	     Dcomp = D;
+	     Qcomp = Ncomp/Dcomp;
+	     Rcomp = Ncomp%Dcomp;
+	     vectornum = vectornum + 1;
+	       if ((Q !== Qcomp)) begin
+	       errors = errors + 1;
+	     end
+	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
+		       N, D, Q, rem0, Qcomp, Rcomp, 
+		       (Q==Qcomp), (rem0==Rcomp));
+	  end // for (i=0; i<2, i=i+1)
+	$display("%d tests completed, %d errors", vectornum, errors);
+	$finish;	
+     end 
+
+endmodule // tb
--- a/pipelined/srt/stine/test_iter128S.sv
+++ b/pipelined/srt/stine/test_iter128S.sv
@ -0,0 +1,90 @@
+`include "idiv-config.vh"
+
+module tb;
+
+   logic [127:0]  N, D;
+   logic 	  clk;
+   logic 	  reset;   
+   logic 	  start;
+   logic 	  S;   
+   
+   logic [127:0]   Q;
+   logic [127:0]  rem0;
+   logic 	 div0;
+   logic 	 done;
+   
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+
+   logic [31:0]  rnd1;
+   logic [31:0]  rnd2;            
+   logic [127:0] Ncomp;
+   logic [127:0] Dcomp;
+   logic [127:0] Qcomp;
+   logic [127:0] Rcomp;
+   
+   logic [31:0]  vectornum;
+   logic [31:0]  errors;   
+   
+   intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
+   
+   initial 
+     begin	
+	clk = 1'b0;
+	forever #5 clk = ~clk;
+     end
+
+   initial
+     begin
+	vectornum = 0;
+	errors = 0;	
+	handle3 = $fopen("iter128_signed.out");
+     end
+
+   /*
+   // VCD generation for power estimation
+   initial
+     begin
+        $dumpfile("iter128_signed.vcd");
+	$dumpvars (0,tb.dut);	
+     end
+    */      
+
+   always @(posedge clk, posedge reset)
+     begin
+	desc3 = handle3;	
+	#0  start = 1'b0;
+	#0  S = 1'b1;	
+	#0  reset = 1'b1;
+	#30 reset = 1'b0;
+	#30 N = 128'h0;
+	#0  D = 128'h0;	
+	for (i=0; i<`IDIV_TESTS; i=i+1)
+	  begin
+	     N = {$urandom(), $urandom(), $urandom(), $urandom()};
+	     D = {$urandom(), $urandom(), $urandom(), $urandom()};		
+	     start <= 1'b1;
+	     // Wait 2 cycles (to be sure)
+	     repeat (1)
+	       @(posedge clk);
+	     start <= 1'b0;	     
+	     repeat (65)
+	       @(posedge clk);
+	     Ncomp = N;
+	     Dcomp = D;
+	     Qcomp = $signed(Ncomp)/$signed(Dcomp);
+	     Rcomp = $signed(Ncomp)%$signed(Dcomp);	     
+	     vectornum = vectornum + 1;
+	       if ((Q !== Qcomp)) begin
+	       errors = errors + 1;
+	     end
+	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
+		       N, D, Q, rem0, Qcomp, Rcomp, 
+		       (Q==Qcomp), (rem0==Rcomp));
+	  end 
+	$display("%d tests completed, %d errors", vectornum, errors);
+	$finish;	
+     end 
+
+endmodule // tb
--- a/pipelined/srt/stine/test_iter32.sv
+++ b/pipelined/srt/stine/test_iter32.sv
@ -0,0 +1,85 @@
+`include "idiv-config.vh"
+
+module tb;
+
+   logic [31:0]  N, D;
+   logic 	 clk;
+   logic 	 reset;   
+   logic 	 start;
+   logic 	 S;   
+   
+   logic [31:0]  Q;
+   logic [31:0]  rem0;
+   logic 	 div0;
+   logic 	 done;
+   
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+   
+   logic [31:0]  Ncomp;
+   logic [31:0]  Dcomp;
+   logic [31:0]  Qcomp;
+   logic [31:0]  Rcomp;   
+   
+   logic [31:0]  vectornum;     
+   logic [31:0]  errors;   
+
+   intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
+   
+   initial 
+     begin	
+	clk = 1'b0;
+	forever #5 clk = ~clk;
+     end
+
+   initial
+     begin
+	vectornum = 0;
+	errors = 0;	
+	handle3 = $fopen("iter32_unsigned.out");
+     end
+
+   always @(posedge clk, posedge reset)
+     begin
+	desc3 = handle3;	
+	#0  start = 1'b0;
+	#0  S = 1'b0;	
+	#0  reset = 1'b1;
+	#30 reset = 1'b0;
+	#30 N = 32'h0;
+	#0  D = 32'h0;		
+	for (i=0; i<`IDIV_TESTS; i=i+1)
+	  begin
+	     N = $urandom;
+	     D = $urandom;
+	     start <= 1'b1;
+	     // Wait 2 cycles (to be sure)
+	     repeat (2)
+	       @(posedge clk);
+	     start <= 1'b0;
+	     repeat (41)
+	       @(posedge clk);
+	     Ncomp = N;
+	     Dcomp = D;
+	     Qcomp = Ncomp/Dcomp;
+	     Rcomp = Ncomp%Dcomp;
+	     if ((Q !== Qcomp)) begin
+	       errors = errors + 1;
+	     end
+	     vectornum = vectornum + 1;	     
+	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
+		       N, D, Q, rem0, Qcomp, Rcomp, 
+		       (Q==Qcomp), (rem0==Rcomp));
+	  end // for (i=0; i<2, i=i+1)
+	$display("%d tests completed, %d errors", vectornum, errors);
+	$finish;	
+     end 
+
+endmodule // tb
+
+
+
+
+
+
--- a/pipelined/srt/stine/test_iter32S.sv
+++ b/pipelined/srt/stine/test_iter32S.sv
@ -0,0 +1,79 @@
+`include "idiv-config.vh"
+
+module tb;
+
+   logic [31:0]  N, D;
+   logic 	 clk;
+   logic 	 reset;   
+   logic 	 start;
+   logic 	 S;   
+   
+   logic [31:0]  Q;
+   logic [31:0]  rem0;
+   logic 	 div0;
+   logic 	 done;
+   
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+
+   logic [31:0]  Ncomp;
+   logic [31:0]  Dcomp;
+   logic [31:0]  Qcomp;
+   logic [31:0]  Rcomp;   
+   
+   logic [31:0]  vectornum;
+   logic [31:0]  errors;   
+
+   intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
+   
+   initial 
+     begin	
+	clk = 1'b0;
+	forever #5 clk = ~clk;
+     end
+
+   initial
+     begin
+	vectornum = 0;
+	errors = 0;	
+	handle3 = $fopen("iter32_signed.out");
+     end
+
+   always @(posedge clk, posedge reset)
+     begin
+	desc3 = handle3;	
+	#0  start = 1'b0;
+	#0  S = 1'b1;	
+	#0  reset = 1'b1;
+	#30 reset = 1'b0;
+	#30 N = 32'h0;
+	#0  D = 32'h0;	
+	for (i=0; i<`IDIV_TESTS; i=i+1)
+	  begin
+	     N = $urandom;
+	     D = $urandom;
+	     start <= 1'b1;
+	     // Wait 2 cycles (to be sure)
+	     repeat (2)
+	       @(posedge clk);
+	     start <= 1'b0;
+	     repeat (41)
+	       @(posedge clk);
+	     Ncomp = N;
+	     Dcomp = D;
+	     Qcomp = $signed(Ncomp)/$signed(Dcomp);
+	     Rcomp = $signed(Ncomp)%$signed(Dcomp);
+	       if ((Q !== Qcomp)) begin
+	       errors = errors + 1;
+	     end
+	     vectornum = vectornum + 1;	     
+	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
+		       N, D, Q, rem0, Qcomp, Rcomp, 
+		       (Q==Qcomp), (rem0==Rcomp));
+	  end // for (i=0; i<2, i=i+1)
+	$display("%d tests completed, %d errors", vectornum, errors);	
+	$finish;	
+     end 
+
+endmodule // tb
--- a/pipelined/srt/stine/test_iter64.sv
+++ b/pipelined/srt/stine/test_iter64.sv
@ -0,0 +1,79 @@
+`include "idiv-config.vh"
+
+module tb;
+
+   logic [63:0]  N, D;
+   logic 	 clk;
+   logic 	 reset;   
+   logic 	 start;
+   logic 	 S;   
+   
+   logic [63:0]  Q;
+   logic [63:0]  rem0;
+   logic 	 div0;
+   logic 	 done;
+   
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+
+   logic [63:0]  Ncomp;
+   logic [63:0]  Dcomp;
+   logic [63:0]  Qcomp;
+   logic [63:0]  Rcomp;   
+   
+   logic [31:0]  vectornum;
+   logic [31:0]  errors;   
+   
+   intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
+   
+   initial 
+     begin	
+	clk = 1'b0;
+	forever #5 clk = ~clk;
+     end
+
+   initial
+     begin
+	vectornum = 0;
+	errors = 0;	
+	handle3 = $fopen("iter64_unsigned.out");
+     end
+
+   always @(posedge clk, posedge reset)
+     begin
+	desc3 = handle3;	
+	#0  start = 1'b0;
+	#0  S = 1'b0;	
+	#0  reset = 1'b1;
+	#30 reset = 1'b0;
+	#30 N = 64'h0;
+	#0  D = 64'h0;	
+	for (i=0; i<`IDIV_TESTS; i=i+1)
+	  begin
+	     N = {$urandom(), $urandom()};
+	     D = {$urandom(), $urandom()};
+	     start <= 1'b1;
+	     // Wait 2 cycles (to be sure)
+	     repeat (2)
+	       @(posedge clk);
+	     start <= 1'b0;	     
+	     repeat (41)
+	       @(posedge clk);
+	     Ncomp = N;
+	     Dcomp = D;
+	     Qcomp = Ncomp/Dcomp;
+	     Rcomp = Ncomp%Dcomp;
+	     vectornum = vectornum + 1;
+	       if ((Q !== Qcomp)) begin
+	       errors = errors + 1;
+	     end
+	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
+		       N, D, Q, rem0, Qcomp, Rcomp, 
+		       (Q==Qcomp), (rem0==Rcomp));
+	  end // for (i=0; i<2, i=i+1)
+	$display("%d tests completed, %d errors", vectornum, errors);
+	$finish;	
+     end 
+
+endmodule // tb
--- a/pipelined/srt/stine/test_iter64S.sv
+++ b/pipelined/srt/stine/test_iter64S.sv
@ -0,0 +1,79 @@
+`include "idiv-config.vh"
+
+module tb;
+
+   logic [63:0]  N, D;
+   logic 	 clk;
+   logic 	 reset;   
+   logic 	 start;
+   logic 	 S;   
+   
+   logic [63:0]  Q;
+   logic [63:0]  rem0;
+   logic 	 div0;
+   logic 	 done;
+   
+   integer 	 handle3;
+   integer 	 desc3;
+   integer 	 i;   
+
+   logic [63:0]  Ncomp;
+   logic [63:0]  Dcomp;
+   logic [63:0]  Qcomp;
+   logic [63:0]  Rcomp;
+   
+   logic [31:0]  vectornum;
+   logic [31:0]  errors;   
+
+   intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
+   
+   initial 
+     begin	
+	clk = 1'b0;
+	forever #5 clk = ~clk;
+     end
+
+   initial
+     begin
+	vectornum = 0;
+	errors = 0;
+	handle3 = $fopen("iter64_signed.out");
+     end
+
+   always @(posedge clk, posedge reset)
+     begin
+	desc3 = handle3;	
+	#0  start = 1'b0;
+	#0  S = 1'b1;	
+	#0  reset = 1'b1;
+	#30 reset = 1'b0;
+	#30 N = 64'h0;
+	#0  D = 64'h0;	
+	for (i=0; i<`IDIV_TESTS; i=i+1)
+	  begin
+	     N = {$urandom(), $urandom()};
+	     D = {$urandom(), $urandom()};	     
+	     start <= 1'b1;
+	     // Wait 2 cycles (to be sure)
+	     repeat (2)
+	       @(posedge clk);
+	     start <= 1'b0;	     
+	     repeat (41)
+	       @(posedge clk);
+	     Ncomp = N;
+	     Dcomp = D;
+	     Qcomp = $signed(Ncomp)/$signed(Dcomp);
+	     Rcomp = $signed(Ncomp)%$signed(Dcomp);
+	     if ((Q !== Qcomp)) begin
+	       errors = errors + 1;
+	     end
+	     vectornum = vectornum + 1;	     
+	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
+		       N, D, Q, rem0, Qcomp, Rcomp, 
+		       (Q==Qcomp), (rem0==Rcomp));
+	  end // for (i=0; i<2, i=i+1)
+	$display("%d tests completed, %d errors", vectornum, errors);
+	$finish;
+     end 
+
+endmodule // tb
--- a/pipelined/srt/stine/tmp
+++ b/pipelined/srt/stine/tmp
--- a/pipelined/srt/testbench-radix4.sv
+++ b/pipelined/srt/testbench-radix4.sv
@ -0,0 +1,122 @@
+
+`include "wally-config.vh"
+`define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF)
+
+///////////
+// clock //
+///////////
+module clock(clk);
+  output clk;
+ 
+  // Internal clk signal
+  logic clk;
+ 
+endmodule
+
+//////////
+// testbench //
+//////////
+module testbenchradix4;
+  logic              clk;
+  logic              req;
+  logic              DivDone;
+  logic [63:0]       a, b;
+  logic [51:0]       afrac, bfrac;
+  logic [10:0]       aExp, bExp;
+  logic              asign, bsign;
+  logic [51:0]       r, rOTFC;
+  logic [`DIVLEN-1:0]  Quot, QuotOTFC;
+  logic [54:0]       rp, rm;   // positive quotient digits
+ 
+  // Test parameters
+  parameter MEM_SIZE = 40000;
+  parameter MEM_WIDTH = 64+64+64;
+ 
+  `define memr  63:0
+  `define memb  127:64
+  `define mema  191:128
+
+  // Test logicisters
+  logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
+  logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
+                            // bit field of an array 
+  logic [63:0] correctr, nextr, diffn, diffp;
+  logic [10:0] DivExp;
+  logic        DivSgn;
+  integer testnum, errors;
+
+  // Divider
+  srtradix4 srtradix4(.clk, .DivStart(req), 
+                .XExpE(aExp), .YExpE(bExp), .DivExp,
+                .XSgnE(asign), .YSgnE(bsign), .DivSgn,
+                .XFrac(afrac), .YFrac(bfrac), 
+                .SrcA('0), .SrcB('0),
+                .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone,
+                .Quot, .Rem());
+
+  // Counter
+
+
+    initial
+    forever
+      begin
+        clk = 1; #17;
+        clk = 0; #17;
+      end
+
+
+  // Read test vectors from disk
+  initial
+    begin
+      testnum = 0; 
+      errors = 0;
+      $readmemh ("testvectors", Tests);
+      Vec = Tests[testnum];
+      a = Vec[`mema];
+      {asign, aExp, afrac} = a;
+      b = Vec[`memb];
+      {bsign, bExp, bfrac} = b;
+      nextr = Vec[`memr];
+      r = Quot[`DIVLEN-1:`DIVLEN - 52];
+      req <= 1;
+    end
+  
+  // Apply directed test vectors read from file.
+
+  always @(posedge clk)
+    begin
+      r = Quot[`DIVLEN-1:`DIVLEN - 52];
+      if (DivDone) begin
+        req <= 1;
+        diffp = correctr[51:0] - r;
+        diffn = r - correctr[51:0];
+        if ((DivSgn !== correctr[63]) | (DivExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
+          begin
+            errors = errors+1;
+            $display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp);
+            $display("failed\n");
+            $stop;
+          end
+        if (afrac === 52'hxxxxxxxxxxxxx)
+          begin
+            $display("%d Tests completed successfully", testnum);
+            $stop;
+          end
+	end
+      if (req) 
+	begin
+	  req <= 0;
+	  correctr = nextr;
+	  testnum = testnum+1;
+	  Vec = Tests[testnum];
+	  $display("a = %h  b = %h",a,b);
+    a = Vec[`mema];
+    {asign, aExp, afrac} = a;
+    b = Vec[`memb];
+    {bsign, bExp, bfrac} = b;
+    nextr = Vec[`memr];
+	end
+    end
+ 
+endmodule
+ 
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@ -0,0 +1,187 @@
+`define DIVLEN 64
+
+/////////////
+// counter //
+/////////////
+// module counter(input  logic clk, 
+//                input  logic req, 
+//                output logic done);
+ 
+//    logic    [7:0]  count;
+
+//   // This block of control logic sequences the divider
+//   // through its iterations.  You may modify it if you
+//   // build a divider which completes in fewer iterations.
+//   // You are not responsible for the (trivial) circuit
+//   // design of the block.
+
+//   always @(posedge clk)
+//     begin
+//       if      (count == `DIVLEN + 2) done <= #1 1;
+//       else if (done | req) done <= #1 0;	
+//       if (req) count <= #1 0;
+//       else     count <= #1 count+1;
+//     end
+// endmodule
+
+///////////
+// clock //
+///////////
+module clock(clk);
+  output clk;
+ 
+  // Internal clk signal
+  logic clk;
+ 
+endmodule
+
+//////////
+// testbench //
+//////////
+module testbench;
+  logic              clk;
+  logic              req;
+  logic              done;
+  logic              Int;
+  logic [63:0]       a, b;
+  logic [51:0]       afrac, bfrac;
+  logic [10:0]       aExp, bExp;
+  logic              asign, bsign;
+  logic [51:0]       r;
+  logic [63:0]       rInt;
+  logic [`DIVLEN-1:0]  Quot;
+ 
+  // Test parameters
+  parameter MEM_SIZE = 40000;
+  parameter MEM_WIDTH = 64+64+64+64;
+ 
+  // INT TEST SIZES
+  // `define memrem  63:0 
+  // `define memr  127:64
+  // `define memb  191:128
+  // `define mema  255:192
+
+  // FLOAT TEST SIZES
+  `define memr  63:0 
+  `define memb  127:64
+  `define mema  191:128
+
+  // Test logicisters
+  logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
+  logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
+                            // bit field of an array 
+  logic [63:0] correctr, nextr, diffn, diffp;
+  logic [10:0] rExp;
+  logic        rsign;
+  integer testnum, errors;
+
+  assign Int = 1'b0;
+
+  // Divider
+  srt srt(.clk, .Start(req), 
+                .Stall(1'b0), .Flush(1'b0), 
+                .XExp(aExp), .YExp(bExp), .rExp,
+                .XSign(asign), .YSign(bsign), .rsign,
+                .SrcXFrac(afrac), .SrcYFrac(bfrac), 
+                .SrcA(a), .SrcB(b), .Fmt(2'b00), 
+                .W64(1'b1), .Signed(1'b0), .Int, .Sqrt(1'b0), 
+                .Quot, .Rem(), .Flags(), .done);
+
+  // Counter
+  // counter counter(clk, req, done);
+
+
+    initial
+    forever
+      begin
+        clk = 1; #17;
+        clk = 0; #16;
+      end
+
+
+  // Read test vectors from disk
+  initial
+    begin
+      testnum = 0; 
+      errors = 0;
+      $readmemh ("testvectors", Tests);
+      Vec = Tests[testnum];
+      a = Vec[`mema];
+      {asign, aExp, afrac} = a;
+      b = Vec[`memb];
+      {bsign, bExp, bfrac} = b;
+      nextr = Vec[`memr];
+      r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
+      rInt = Quot;
+      req <= #5 1;
+    end
+  
+  // Apply directed test vectors read from file.
+
+  always @(posedge clk) begin
+    r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
+    rInt = Quot;
+    if (done) begin
+      if (~Int & ~Sqrt) begin
+        req <= #5 1;
+        diffp = correctr[51:0] - r;
+        diffn = r - correctr[51:0];
+        if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
+          begin
+            errors = errors+1;
+            $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
+            $display("failed\n");
+            $stop;
+          end
+        if (afrac === 52'hxxxxxxxxxxxxx)
+          begin
+            $display("%d Tests completed successfully", testnum);
+            $stop;
+          end
+      end else if (~Sqrt) begin
+        req <= #5 1;
+        diffp = correctr[63:0] - rInt;
+        diffn = rInt - correctr[63:0];
+        if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
+          begin
+            errors = errors+1;
+            $display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp);
+            $display("failed\n");
+            $stop;
+          end
+        if (afrac === 52'hxxxxxxxxxxxxx)
+        begin
+          $display("%d Tests completed successfully", testnum);
+          $stop;
+        end
+      end else begin 
+        req <= #5 1;
+        diffp = correctr[51:0] - r;
+        diffn = r - correctr[51:0];
+        if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
+          begin
+            errors = errors + 1;
+            $display("result was %h, should be %h %h %h\n", rSqrt, correctr, diffn, diffp);
+            $display("failed\n");
+            $stop;
+          end
+        if (afrac === 52'hxxxxxxxxxxxxx) begin 
+          $display("%d Tests completed successfully", testnum);
+          $stop; end 
+      end
+    end
+    if (req) begin
+      req <= #5 0;
+      correctr = nextr;
+      testnum = testnum+1;
+      Vec = Tests[testnum];
+      $display("a = %h  b = %h",a,b);
+      a = Vec[`mema];
+      {asign, aExp, afrac} = a;
+      b = Vec[`memb];
+      {bsign, bExp, bfrac} = b;
+      nextr = Vec[`memr];
+    end
+  end
+endmodule
+ 
--- a/pipelined/srt/testgen.c
+++ b/pipelined/srt/testgen.c
@ -0,0 +1,94 @@
+/* testgen.c */
+
+/* Written 10/31/96 by David Harris
+
+   This program creates test vectors for mantissa component
+   of an IEEE floating point divider. 
+   */
+
+/* #includes */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Constants */
+
+#define ENTRIES  17
+#define RANDOM_VECS 500
+
+/* Prototypes */
+
+void output(FILE *fptr, double a, double b, double r);
+void printhex(FILE *fptr, double x);
+double random_input(void);
+
+/* Main */
+
+void main(void)
+{
+  FILE *fptr;
+  double a, b, r;
+  double list[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
+			  1.75, 1.875, 1.99999,
+			  1.1, 1.2, 1.01, 1.001, 1.0001,
+			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
+  int i, j;
+
+  if ((fptr = fopen("testvectors","w")) == NULL) {
+    fprintf(stderr, "Couldn't write testvectors file\n");
+    exit(1);
+  }
+
+  for (i=0; i<ENTRIES; i++) {
+    b = list[i];
+    for (j=0; j<ENTRIES; j++) {
+      a = list[j];
+      r = a/b;
+      output(fptr, a, b, r);
+    }
+  }
+  
+  for (i = 0; i< RANDOM_VECS; i++) {
+    a = random_input();
+    b = random_input();
+    r = a/b;
+    output(fptr, a, b, r);
+  }
+
+  fclose(fptr);
+}
+
+/* Functions */
+
+void output(FILE *fptr, double a, double b, double r)
+{
+  printhex(fptr, a);
+  fprintf(fptr, "_");
+  printhex(fptr, b);
+  fprintf(fptr, "_");
+  printhex(fptr, r);
+  fprintf(fptr, "\n");
+
+
+}
+
+void printhex(FILE *fptr, double m)
+{
+  int i, val;
+
+  while (m<1) m *= 2;
+  while (m>2) m /= 2;
+  for (i=0; i<52; i+=4) {
+    m = m - floor(m);
+    m = m * 16;
+    val = (int)(m)%16;
+    fprintf(fptr, "%x", val);
+  }    
+}    
+
+double random_input(void)
+{
+  return 1.0 + rand()/32767.0;
+}
+