makefile

2025-02-11 06:05:49 +00:00 · 2022-07-07 16:43:03 -07:00 · 2022-07-07 16:43:03 -07:00 · eba518625d
commit eba518625d
parent 7ef87777c1 b67792086c
70 changed files with 11972 additions and 0 deletions
--- a/pipelined/srt/Makefile
+++ b/pipelined/srt/Makefile
@ -0,0 +1,33 @@
 all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
 sqrttestgen: sqrttestgen.c
 	gcc sqrttestgen.c -o sqrttestgen -lm
 	./sqrttestgen
 testgen: testgen.c
 	gcc testgen.c -o testgen -lm
 	./testgen
 exptestgen: exptestgen.c
 	gcc -o exptestgen exptestgen.c -lm
 	./exptestgen
 qslc_r4a2: qslc_r4a2.c
 	gcc qslc_r4a2.c -o qslc_r4a2 -lm
 	./qslc_r4a2 > qslc_r4a2.sv
 qslc_r4a2b: qslc_r4a2b.c
 	gcc qslc_r4a2b.c -o qslc_r4a2b -lm
 	./qslc_r4a2b > qslc_r4a2b.tv
 qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
 	gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
 	./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
 inttestgen: inttestgen.c
 	gcc -lm -o inttestgen inttestgen.c
 	./inttestgen
 clean:
 	rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
--- a/pipelined/srt/exptestgen.c
+++ b/pipelined/srt/exptestgen.c
@ -0,0 +1,127 @@
 /* testgen.c */
 /* Written 2/19/2022 by David Harris
   This program creates test vectors for mantissa and exponent components
   of an IEEE floating point divider.
   Builds upon program that creates test vectors for mantissa component only.
   */
 /* #includes */
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 /* Constants */
 #define ENTRIES  17
 #define RANDOM_VECS 500
 // #define BIAS 1023 // Bias is for double precision
 /* Prototypes */
 void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac);
 void printhex(FILE *fptr, double x);
 double random_input(void);
 double random_input_e(void);
 /* Main */
 void main(void)
 {
  FILE *fptr;
  // aExp & bExp are exponents
  // aFrac & bFrac are mantissas
  // rFrac is result of fractional divsion
  // rExp is result of exponent division
  double aFrac, bFrac, rFrac;
  int    aExp,  bExp,  rExp;
  int    aSign, bSign, rSign;
  double mantissa[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
 			  1.1, 1.2, 1.01, 1.001, 1.0001,
 			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
  int exponent[ENTRIES] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
  int i, j;
  int bias = 1023;
  if ((fptr = fopen("testvectors","w")) == NULL) { 
    fprintf(stderr, "Couldn't write testvectors file\n");
    exit(1);
  }
  for (i=0; i<ENTRIES; i++) {
    bFrac = mantissa[i];
    bExp = exponent[i] + bias;
    bSign = i%2;
    for (j=0; j<ENTRIES; j++) {
      aFrac = mantissa[j];
      aExp = exponent[j] + bias;
      aSign = j%2;
      rFrac = aFrac/bFrac;
      rExp = aExp - bExp + bias;
      rSign = (i+j)%2;
      output(fptr, aSign, aExp, aFrac, bSign, bExp, bFrac, rSign, rExp, rFrac);
    }
  }
  // for (i = 0; i< RANDOM_VECS; i++) {
  //   aFrac = random_input();
  //   bFrac = random_input();
  //   aExp = random_input_e() + BIAS; // make new random input function for exponents
  //   bExp = random_input_e() + BIAS;
  //   rFrac = a/b;
  //   rEx[] = e1 - e2 + BIAS;
  //   output(fptr, aExp, aFrac, bExp, bFrac, rExp, rFrac);
  // }
  fclose(fptr);
 }
 /* Functions */
 void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac)
 {
  // Print a in standard double format
  fprintf(fptr, "%03x", aExp|(aSign<<11));
  printhex(fptr, aFrac);
  fprintf(fptr, "_");
  // Print b in standard double format
  fprintf(fptr, "%03x", bExp|(bSign<<11));
  printhex(fptr, bFrac);
  fprintf(fptr, "_");
  // Print r in standard double format
  fprintf(fptr, "%03x", rExp|(rSign<<11));
  printhex(fptr, rFrac);
  fprintf(fptr, "\n");
 }
 void printhex(FILE *fptr, double m)
 {
  int i, val, len;
    len = 52;
    while (m<1) m *= 2;
    while (m>2) m /= 2;
    for (i=0; i<len; i+=4) {
      m = m - floor(m);
      m = m * 16;
      val = (int)(m)%16;
      fprintf(fptr, "%x", val);
    }  
 }    
 double random_input(void)
 {
  return 1.0 + rand()/32767.0;
 }
 double random_input_e(void)
 {
  return rand() % 300 + 1;
 }
--- a/pipelined/srt/inttestgen
+++ b/pipelined/srt/inttestgen
--- a/pipelined/srt/inttestgen.c
+++ b/pipelined/srt/inttestgen.c
@ -0,0 +1,83 @@
 /* testgen.c */
 /* Written 10/31/96 by David Harris
   This program creates test vectors for mantissa component
   of an IEEE floating point divider. 
   */
 /* #includes */
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 /* Constants */
 #define ENTRIES  10
 #define RANDOM_VECS 500
 /* Prototypes */
 void output(FILE *fptr, long a, long b, long r, long rem);
 void printhex(FILE *fptr, long x);
 double random_input(void);
 /* Main */
 void main(void)
 {
  FILE *fptr;
  long a, b, r, rem;
  long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
  int i, j;
  if ((fptr = fopen("inttestvectors","w")) == NULL) {
    fprintf(stderr, "Couldn't write testvectors file\n");
    exit(1);
  }
  for (i=0; i<ENTRIES; i++) {
    b = list[i];
    for (j=0; j<ENTRIES; j++) {
      a = list[j];
      r = a/b;
      rem = a%b;
      output(fptr, a, b, r, rem);
    }
  }
 //   for (i = 0; i< RANDOM_VECS; i++) {
 //     a = random_input();
 //     b = random_input();
 //     r = a/b;
 //     output(fptr, a, b, r);
 //   }
  fclose(fptr);
 }
 /* Functions */
 void output(FILE *fptr, long a, long b, long r, long rem)
 {
  printhex(fptr, a);
  fprintf(fptr, "_");
  printhex(fptr, b);
  fprintf(fptr, "_");
  printhex(fptr, r);
  fprintf(fptr, "_");
  printhex(fptr, rem);
  fprintf(fptr, "\n");
 }
 void printhex(FILE *fptr, long m)
 {
    fprintf(fptr, "%016llx", m);
 }    
 double random_input(void)
 {
  return 1.0 + rand()/32767.0;
 }
--- a/pipelined/srt/lint-srt
+++ b/pipelined/srt/lint-srt
@ -0,0 +1,2 @@
 verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
 verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
--- a/pipelined/srt/qslc_r4a2.c
+++ b/pipelined/srt/qslc_r4a2.c
@ -0,0 +1,198 @@
 /*
  Program:      qslc_r4a2.c
  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
  User:         James E. Stine
 */
 #include <stdio.h>
 #include <math.h>
 #define DIVISOR_SIZE 3
 #define CARRY_SIZE 7
 #define SUM_SIZE 7
 #define TOT_SIZE 7
 void disp_binary(double, int, int);
 struct bits {
  unsigned int divisor : DIVISOR_SIZE;
  int tot : TOT_SIZE;
 } pla;
 /* 
   Function:      disp_binary
   Description:   This function displays a Double-Precision number into
   four 16 bit integers using the global union variable 
   dp_number
   Argument List: double x            The value to be converted
   int bits_to_left    Number of bits left of radix point
   int bits_to_right   Number of bits right of radix point
   Return value:  none
 */
 void disp_binary(double x, int bits_to_left, int bits_to_right) {
  int i; 
  double diff;
  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
      printf("0");
    }
    if (i == bits_to_right+1) 
      ;
    return;
  }
  if (x < 0.0) 
    x = pow(2.0, ((double) bits_to_left)) + x;
  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
    diff = pow(2.0, ((double) -i) );
    if (x < diff) 
      printf("0");
    else {
      printf("1");
      x -= diff;
    }
    if (i == 0) 
      ;
  }
 }
 int main() {
  int m;
  int n;
  int o;
  pla.divisor = 0;
  pla.tot = 0;
  printf("\tcase({D[5:3],Wmsbs})\n");
  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
      printf("\t\t10'b");
      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
      printf("_");
      disp_binary((double) pla.tot, TOT_SIZE, 0);
      printf(": q = 4'b");
      /*
 	4 bits for Radix 4 (a=2)
 	1000 = +2
 	0100 = +1
 	0000 =  0
 	0010 = -1
 	0001 = -2		
      */
      switch (pla.divisor) {
      case 0:
 	if ((pla.tot) >= 12)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -4)
 	  printf("0000");
 	else if ((pla.tot) >= -13)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 1:
 	if ((pla.tot) >= 14)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -6)
 	  printf("0000");
 	else if ((pla.tot) >= -15)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 2:
 	if ((pla.tot) >= 15)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -6)
 	  printf("0000");
 	else if ((pla.tot) >= -16)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 3:
 	if ((pla.tot) >= 16)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -6)
 	  printf("0000");
 	else if ((pla.tot) >= -18)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 4:
 	if ((pla.tot) >= 18)
 	  printf("1000");
 	else if ((pla.tot) >= 6)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -20)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 5:
 	if ((pla.tot) >= 20)
 	  printf("1000");
 	else if ((pla.tot) >= 6)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -20)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 6:
 	if ((pla.tot) >= 20)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -22)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 7:
 	if ((pla.tot) >= 24)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -24)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      default: printf ("XXX");
      }
      printf(";\n");
      (pla.tot)++;
    }
    (pla.divisor)++;
  }
  printf("\tendcase\n");
 }
--- a/pipelined/srt/qslc_r4a2b
+++ b/pipelined/srt/qslc_r4a2b
--- a/pipelined/srt/qslc_r4a2b.c
+++ b/pipelined/srt/qslc_r4a2b.c
@ -0,0 +1,190 @@
 /*
  Program:      qslc_r4a2.c
  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
  User:         James E. Stine
 */
 #include <stdio.h>
 #include <math.h>
 #define DIVISOR_SIZE 3
 #define CARRY_SIZE 7
 #define SUM_SIZE 7
 #define TOT_SIZE 7
 void disp_binary(double, int, int);
 struct bits {
  unsigned int divisor : DIVISOR_SIZE;
  int tot : TOT_SIZE;
 } pla;
 /* 
   Function:      disp_binary
   Description:   This function displays a Double-Precision number into
   four 16 bit integers using the global union variable 
   dp_number
   Argument List: double x            The value to be converted
   int bits_to_left    Number of bits left of radix point
   int bits_to_right   Number of bits right of radix point
   Return value:  none
 */
 void disp_binary(double x, int bits_to_left, int bits_to_right) {
  int i; 
  double diff;
  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
      printf("0");
    }
    if (i == bits_to_right+1) 
      ;
    return;
  }
  if (x < 0.0) 
    x = pow(2.0, ((double) bits_to_left)) + x;
  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
    diff = pow(2.0, ((double) -i) );
    if (x < diff) 
      printf("0");
    else {
      printf("1");
      x -= diff;
    }
    if (i == 0) 
      ;
  }
 }
 int main() {
  int m;
  int n;
  int o;
  pla.divisor = 0;
  pla.tot = 0;
  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
      /*
 	4 bits for Radix 4 (a=2)
 	1000 = +2
 	0100 = +1
 	0000 =  0
 	0010 = -1
 	0001 = -2		
      */
      switch (pla.divisor) {
      case 0:
 	if ((pla.tot) >= 12)
 	  printf("8");
 	else if ((pla.tot) >= 4)
 	  printf("4");
 	else if ((pla.tot) >= -4)
 	  printf("0");
 	else if ((pla.tot) >= -13)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      case 1:
 	if ((pla.tot) >= 14)
 	  printf("8");
 	else if ((pla.tot) >= 4)
 	  printf("4");
 	else if ((pla.tot) >= -6)
 	  printf("0");
 	else if ((pla.tot) >= -15)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      case 2:
 	if ((pla.tot) >= 15)
 	  printf("8");
 	else if ((pla.tot) >= 4)
 	  printf("4");
 	else if ((pla.tot) >= -6)
 	  printf("0");
 	else if ((pla.tot) >= -16)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      case 3:
 	if ((pla.tot) >= 16)
 	  printf("8");
 	else if ((pla.tot) >= 4)
 	  printf("4");
 	else if ((pla.tot) >= -6)
 	  printf("0");
 	else if ((pla.tot) >= -18)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      case 4:
 	if ((pla.tot) >= 18)
 	  printf("8");
 	else if ((pla.tot) >= 6)
 	  printf("4");
 	else if ((pla.tot) >= -8)
 	  printf("0");
 	else if ((pla.tot) >= -20)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      case 5:
 	if ((pla.tot) >= 20)
 	  printf("8");
 	else if ((pla.tot) >= 6)
 	  printf("4");
 	else if ((pla.tot) >= -8)
 	  printf("0");
 	else if ((pla.tot) >= -20)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      case 6:
 	if ((pla.tot) >= 20)
 	  printf("8");
 	else if ((pla.tot) >= 8)
 	  printf("4");
 	else if ((pla.tot) >= -8)
 	  printf("0");
 	else if ((pla.tot) >= -22)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      case 7:
 	if ((pla.tot) >= 24)
 	  printf("8");
 	else if ((pla.tot) >= 8)
 	  printf("4");
 	else if ((pla.tot) >= -8)
 	  printf("0");
 	else if ((pla.tot) >= -24)
 	  printf("2");
 	else
 	  printf("1");
 	break;
      default: printf ("X");
      }
      printf("\n");
      (pla.tot)++;
    }
    (pla.divisor)++;
  }
 }
--- a/pipelined/srt/qslc_r4a2b.tv
+++ b/pipelined/srt/qslc_r4a2b.tv
--- a/pipelined/srt/qslc_sqrt_r4a2
+++ b/pipelined/srt/qslc_sqrt_r4a2
--- a/pipelined/srt/qslc_sqrt_r4a2.c
+++ b/pipelined/srt/qslc_sqrt_r4a2.c
@ -0,0 +1,198 @@
 /*
  Program:      qslc_r4a2.c
  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
  User:         James E. Stine
 */
 #include <stdio.h>
 #include <math.h>
 #define DIVISOR_SIZE 3
 #define CARRY_SIZE 7
 #define SUM_SIZE 7
 #define TOT_SIZE 7
 void disp_binary(double, int, int);
 struct bits {
  unsigned int divisor : DIVISOR_SIZE;
  int tot : TOT_SIZE;
 } pla;
 /* 
   Function:      disp_binary
   Description:   This function displays a Double-Precision number into
   four 16 bit integers using the global union variable 
   dp_number
   Argument List: double x            The value to be converted
   int bits_to_left    Number of bits left of radix point
   int bits_to_right   Number of bits right of radix point
   Return value:  none
 */
 void disp_binary(double x, int bits_to_left, int bits_to_right) {
  int i; 
  double diff;
  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
      printf("0");
    }
    if (i == bits_to_right+1) 
      ;
    return;
  }
  if (x < 0.0) 
    x = pow(2.0, ((double) bits_to_left)) + x;
  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
    diff = pow(2.0, ((double) -i) );
    if (x < diff) 
      printf("0");
    else {
      printf("1");
      x -= diff;
    }
    if (i == 0) 
      ;
  }
 }
 int main() {
  int m;
  int n;
  int o;
  pla.divisor = 0;
  pla.tot = 0;
  printf("\tcase({D[5:3],Wmsbs})\n");
  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
      printf("\t\t11'b");
      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
      printf("_");
      disp_binary((double) pla.tot, TOT_SIZE, 0);
      printf(": q = 4'b");
      /*
 	4 bits for Radix 4 (a=2)
 	1000 = +2
 	0100 = +1
 	0000 =  0
 	0010 = -1
 	0001 = -2		
      */
      switch (pla.divisor) {
      case 0:
 	if ((pla.tot) >= 24)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -26)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 1:
 	if ((pla.tot) >= 28)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -10)
 	  printf("0000");
 	else if ((pla.tot) >= -28)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 2:
 	if ((pla.tot) >= 32)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -12)
 	  printf("0000");
 	else if ((pla.tot) >= -32)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 3:
 	if ((pla.tot) >= 32)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -12)
 	  printf("0000");
 	else if ((pla.tot) >= -34)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 4:
 	if ((pla.tot) >= 36)
 	  printf("1000");
 	else if ((pla.tot) >= 12)
 	  printf("0100");
 	else if ((pla.tot) >= -12)
 	  printf("0000");
 	else if ((pla.tot) >= -36)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 5:
 	if ((pla.tot) >= 40)
 	  printf("1000");
 	else if ((pla.tot) >= 12)
 	  printf("0100");
 	else if ((pla.tot) >= -16)
 	  printf("0000");
 	else if ((pla.tot) >= -40)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 6:
 	if ((pla.tot) >= 40)
 	  printf("1000");
 	else if ((pla.tot) >= 16)
 	  printf("0100");
 	else if ((pla.tot) >= -16)
 	  printf("0000");
 	else if ((pla.tot) >= -44)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 7:
 	if ((pla.tot) >= 44)
 	  printf("1000");
 	else if ((pla.tot) >= 16)
 	  printf("0100");
 	else if ((pla.tot) >= -16)
 	  printf("0000");
 	else if ((pla.tot) >= -46)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      default: printf ("XXX");
      }
      printf(";\n");
      (pla.tot)++;
    }
    (pla.divisor)++;
  }
  printf("\tendcase\n");
 }
--- a/pipelined/srt/qslc_sqrt_r4a2.sv
+++ b/pipelined/srt/qslc_sqrt_r4a2.sv
--- a/pipelined/srt/sim-srt
+++ b/pipelined/srt/sim-srt
@ -0,0 +1,2 @@
 vsim -do "do srt.do"
--- a/pipelined/srt/sim-srt-batch
+++ b/pipelined/srt/sim-srt-batch
@ -0,0 +1 @@
 vsim -c -do "do srt.do"
--- a/pipelined/srt/sqrttestgen
+++ b/pipelined/srt/sqrttestgen
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@ -0,0 +1,100 @@
 /* sqrttestgen.c */
 /* Written 19 October 2021 David_Harris@hmc.edu
   This program creates test vectors for mantissa component
   of an IEEE floating point square root. 
   */
 /* #includes */
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 /* Constants */
 #define ENTRIES  17
 #define RANDOM_VECS 500
 /* Prototypes */
 void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac);
 void printhex(FILE *fptr, double x);
 double random_input(void);
 /* Main */
 void main(void)
 {
  FILE *fptr;
  double aFrac, rFrac;
  int    aExp,  rExp;
  double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
 			  1.1, 1.2, 1.01, 1.001, 1.0001,
 <<<<<<< Updated upstream
 			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
 =======
 			  2/1.1, 2/1.5, 2/1.25, 2/1.125};
 >>>>>>> Stashed changes
  double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
        11, 12, 13, 14, 15, 16};
  int i;
  int bias = 1023;
  if ((fptr = fopen("sqrttestvectors","w")) == NULL) {
    fprintf(stderr, "Couldn't write sqrttestvectors file\n");
    exit(1);
  }
  for (i=0; i<ENTRIES; i++) {
    aFrac = mans[i];
    aExp  = exps[i] + bias;
    rFrac = sqrt(aFrac * pow(2, aExp - bias));
    rExp  = (int) (log(rFrac)/log(2) + bias);
    output(fptr, aExp, aFrac, rExp, rFrac);
  }
  // for (i = 0; i< RANDOM_VECS; i++) {
  //   a = random_input();
  //   r = sqrt(a);
  //   output(fptr, a, r);
  // }
  fclose(fptr);
 }
 /* Functions */
 void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
 {
  fprintf(fptr, "%03x", aExp);
  printhex(fptr, aFrac);
  fprintf(fptr, "_");
  fprintf(fptr, "%03x", rExp);
  printhex(fptr, rFrac);
  fprintf(fptr, "\n");
 }
 void printhex(FILE *fptr, double m)
 {
  int i, val;
  while (m<1) m *= 2;
  while (m>2) m /= 2;
  for (i=0; i<52; i+=4) {
    m = m - floor(m);
    m = m * 16;
    val = (int)(m)%16;
    fprintf(fptr, "%x", val);
  }    
 }    
 double random_input(void)
 {
  return 1.0 + rand()/32767.0;
 }
--- a/pipelined/srt/srt-waves.do
+++ b/pipelined/srt/srt-waves.do
@ -0,0 +1,5 @@
 add wave -noupdate /testbench/*
 add wave -noupdate /testbench/srt/*
 add wave -noupdate /testbench/srt/otfc2/*
 add wave -noupdate /testbench/srt/preproc/*
 add wave -noupdate /testbench/srt/divcounter/*
--- a/pipelined/srt/srt.do
+++ b/pipelined/srt/srt.do
@ -0,0 +1,28 @@
 # srt.do   
 #
 # David_Harris@hmc.edu 19 October 2021
 # Use this wally-pipelined.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do wally-pipelined.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do wally-pipelined.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 vlog +incdir+../config/rv64gc +incdir+../config/shared srt.sv testbench.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
 vopt +acc work.testbench -o workopt 
 vsim workopt
 -- display input and output signals as hexidecimal values
 do ./srt-waves.do
 -- Run the Simulation 
 run -all
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@ -0,0 +1,334 @@
 ///////////////////////////////////////////
 // srt.sv
 //
 // Written: David_Harris@hmc.edu 13 January 2022
 // Modified: cturek@hmc.edu June 2022
 //
 // Purpose: Combined Divide and Square Root Floating Point and Integer Unit
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // MIT LICENSE
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 // software and associated documentation files (the "Software"), to deal in the Software 
 // without restriction, including without limitation the rights to use, copy, modify, merge, 
 // publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
 // to whom the Software is furnished to do so, subject to the following conditions:
 //
 //   The above copyright notice and this permission notice shall be included in all copies or 
 //   substantial portions of the Software.
 //
 //   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 //   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
 //   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 //   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
 //   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 //   OR OTHER DEALINGS IN THE SOFTWARE.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"
 `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
 `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
 module srt (
  input  logic clk,
  input  logic Start, 
  input  logic Stall, // *** multiple pipe stages
  input  logic Flush, // *** multiple pipe stages
  // Floating Point Inputs
  // later add exponents, signs, special cases
  input  logic       XSign, YSign,
  input  logic [`NE-1:0] XExp, YExp,
  input  logic [`NF-1:0] SrcXFrac, SrcYFrac,
  input  logic [`XLEN-1:0] SrcA, SrcB,
  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
  input  logic       W64, // 32-bit ints on XLEN=64
  input  logic       Signed, // Interpret integers as signed 2's complement
  input  logic       Int, // Choose integer inputs
  input  logic       Sqrt, // perform square root, not divide
  output logic       rsign, done,
  output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
  output logic [`NE-1:0] rExp,
  output logic [3:0] Flags
 );
  logic           qp, qz, qm; // quotient is +1, 0, or -1
  logic [`NE-1:0] calcExp;
  logic           calcSign;
  logic [`DIVLEN+3:0]  X, Dpreproc;
  logic [`DIVLEN+3:0]  WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
  logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
  logic           intSign;
  srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
  // Top Muxes and Registers
  // When start is asserted, the inputs are loaded into the divider.
  // Otherwise, the divisor is retained and the partial remainder
  // is fed back for the next iteration.
  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+2:0], 1'b0}, X, Start, WSN);
  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+2:0], 1'b0}, {(`DIVLEN+4){1'b0}}, Start, WCN);
  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
  flopen #(`DIVLEN+4) dflop(clk, Start, Dpreproc, D);
  // Quotient Selection logic
  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
  flopen #(`NE) expflop(clk, Start, calcExp, rExp);
  flopen #(1) signflop(clk, Start, calcSign, rsign);
  flopen #(7) durflop(clk, Start, calcDur, dur);
  counter divcounter(clk, Start, dur, done);
  // Divisor Selection logic
  assign Db = ~D;
  mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
  // Partial Product Generation
  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
  otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
 <<<<<<< Updated upstream
  expcalc expcalc(.XExp, .YExp, .calcExp);
 =======
  expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 >>>>>>> Stashed changes
  signcalc signcalc(.XSign, .YSign, .calcSign);
 endmodule
 ////////////////
 // Submodules //
 ////////////////
 ///////////////////
 // Preprocessing //
 ///////////////////
 module srtpreproc (
  input  logic [`XLEN-1:0] SrcA, SrcB,
  input  logic [`NF-1:0] SrcXFrac, SrcYFrac,
  input  logic [`NE-1:0] XExp,
  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
  input  logic       W64, // 32-bit ints on XLEN=64
  input  logic       Signed, // Interpret integers as signed 2's complement
  input  logic       Int, // Choose integer inputs
  input  logic       Sqrt, // perform square root, not divide
  output logic [`DIVLEN+3:0] X, D,
  output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent
  output logic       intSign // Quotient integer sign
 );
  logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
  logic  [`XLEN-1:0] PosA, PosB;
  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX;
  assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
  assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
  lzc #(`XLEN) lzcA (PosA, zeroCntA);
  lzc #(`XLEN) lzcB (PosB, zeroCntB);
  assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
  assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
  assign PreprocA = ExtraA << (zeroCntA + 1);
  assign PreprocB = ExtraB << (zeroCntB + 1);
  assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
  assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
  assign DivX = Int ? PreprocA : PreprocX;
  assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac};
  assign X = Sqrt ? SqrtX : {4'b0001, DivX};
  assign D = {4'b0001, Int ? PreprocB : PreprocY};
  assign intExp = zeroCntB - zeroCntA + 1;
  assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
 endmodule
 /////////////////////////////////
 // Quotient Selection, Radix 2 //
 /////////////////////////////////
 module qsel2 ( // *** eventually just change to 4 bits
  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
  output logic         qp, qz, qm
 );
  logic [`DIVLEN+3:`DIVLEN]  p, g;
  logic          magnitude, sign, cout;
  // The quotient selection logic is presented for simplicity, not
  // for efficiency.  You can probably optimize your logic to
  // select the proper divisor with less delay.
  // Quotient equations from EE371 lecture notes 13-20
  assign p = ps ^ pc;
  assign g = ps & pc;
  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
  assign #1 sign = p[`DIVLEN+3] ^ cout;
 /*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
 			  (ps[52]^pc[52]));
  assign #1 sign = (ps[55]^pc[55])^
      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
 			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
 						(ps[52]&pc[52]))))); */
  // Produce quotient = +1, 0, or -1
  assign #1 qp = magnitude & ~sign;
  assign #1 qz = ~magnitude;
  assign #1 qm = magnitude & sign;
 endmodule
 ///////////////////////////////////
 // On-The-Fly Converter, Radix 2 //
 ///////////////////////////////////
 module otfc2 #(parameter N=64) (
  input  logic         clk,
  input  logic         Start,
  input  logic         qp, qz, qm,
  output logic [N-1:0] r
 );
  //  The on-the-fly converter transfers the quotient 
  //  bits to the quotient as they come. 
  //
  //  This code follows the psuedocode presented in the 
  //  floating point chapter of the book. Right now, 
  //  it is written for Radix-2 division.
  //
  //  QM is Q-1. It allows us to write negative bits 
  //  without using a costly CPA. 
  logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
  //  QR and QMR are the shifted versions of Q and QM.
  //  They are treated as [N-1:r] size signals, and 
  //  discard the r most significant bits of Q and QM. 
  logic [N+1:0] QR, QMR;
  flopr #(N+3) Qreg(clk, Start, QNext, Q);
  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
  always_comb begin
    QR  = Q[N+1:0];
    QMR = QM[N+1:0];     // Shift Q and QM
    if (qp) begin
      QNext  = {QR,  1'b1};
      QMNext = {QR,  1'b0};
    end else if (qz) begin
      QNext  = {QR,  1'b0};
      QMNext = {QMR, 1'b1};
    end else begin        // If qp and qz are not true, then qm is
      QNext  = {QMR, 1'b1};
      QMNext = {QMR, 1'b0};
    end 
  end
  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
 endmodule
 /////////////
 // counter //
 /////////////
 module counter(input  logic clk, 
               input  logic req, 
               input  logic [$clog2(`XLEN+1)-1:0] dur,
               output logic done);
   logic    [$clog2(`XLEN+1)-1:0]  count;
  // This block of control logic sequences the divider
  // through its iterations.  You may modify it if you
  // build a divider which completes in fewer iterations.
  // You are not responsible for the (trivial) circuit
  // design of the block.
  always @(posedge clk)
    begin
      if      (count == dur) done <= #1 1;
      else if (done | req) done <= #1 0;	
      if (req) count <= #1 0;
      else     count <= #1 count+1;
    end
 endmodule
 //////////
 // mux3 //
 //////////
 module mux3onehot #(parameter N=65) (
  input  logic [N+3:0] in0, in1, in2,
  input  logic         sel0, sel1, sel2,
  output logic [N+3:0] out
 );
  // lazy inspection of the selects
  // really we should make sure selects are mutually exclusive
  assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2);
 endmodule
 /////////
 // csa //
 /////////
 module csa #(parameter N=69) (
  input  logic [N-1:0] in1, in2, in3, 
  input  logic         cin, 
  output logic [N-1:0] out1, out2
 );
  // This block adds in1, in2, in3, and cin to produce 
  // a result out1 / out2 in carry-save redundant form.
  // cin is just added to the least significant bit and
  // is required to handle adding a negative divisor.
  // Fortunately, the carry (out2) is shifted left by one
  // bit, leaving room in the least significant bit to 
  // insert cin.
  assign #1 out1 = in1 ^ in2 ^ in3;
  assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
 		    (in2[N-2:0] & in3[N-2:0]), cin};
 endmodule
 //////////////
 // expcalc  //
 //////////////
 module expcalc(
 <<<<<<< Updated upstream
  input logic  [`NE-1:0] XExp, YExp,
  output logic [`NE-1:0] calcExp
 );
  assign calcExp = XExp - YExp + (`NE)'(`BIAS);
 =======
  input  logic [`NE-1:0] XExp, YExp,
  input  logic           Sqrt,
  output logic [`NE-1:0] calcExp
 );
  logic        [`NE-1:0] SExp, DExp, SXExp;
  assign SXExp = XExp - (`NE)'(`BIAS);
  assign SExp  = {1'b0, SXExp[`NE-1:1]} + (`NE)'(`BIAS);
  assign DExp  = XExp - YExp + (`NE)'(`BIAS);
  assign calcExp = Sqrt ? SExp : DExp;
 >>>>>>> Stashed changes
 endmodule
 //////////////
 // signcalc //
 //////////////
 module signcalc(
  input logic  XSign, YSign,
  output logic calcSign
 );
  assign calcSign = XSign ^ YSign;
 endmodule
--- a/pipelined/srt/srt_stanford.sv
+++ b/pipelined/srt/srt_stanford.sv
@ -0,0 +1,355 @@
 ///////////////////////////////////////////////////////
 // srt.sv                                            //
 //                                                   //
 // Written 10/31/96 by David Harris harrisd@leland   //
 // Updated 10/19/21 David_Harris@hmc.edu             //
 //                                                   //
 // This file models a simple Radix 2 SRT divider.    //
 //                                                   //
 ///////////////////////////////////////////////////////
 // This Verilog file models a radix 2 SRT divider which
 // produces one quotient digit per cycle.  The divider
 // keeps the partial remainder in carry-save form.
 /////////
 // srt //
 /////////
 module srt(input  logic clk, 
           input  logic req, 
           input  logic sqrt,  // 1 to compute sqrt(a), 0 to compute a/b
           input  logic [51:0] a, b, 
           output logic [54:0] rp, rm);
  // A simple Radix 2 SRT divider/sqrt
  // Internal signals
  logic   [55:0] ps, pc;     // partial remainder in carry-save form
  logic   [55:0] d;          // divisor
  logic   [55:0] psa, pca;   // partial remainder result of csa
  logic   [55:0] psn, pcn;   // partial remainder for next cycle
  logic   [55:0] dn;         // divisor for next cycle
  logic   [55:0] dsel;       // selected divisor multiple
  logic          qp, qz, qm; // quotient is +1, 0, or -1
  logic   [55:0] d_b;        // inverse of divisor
  // Top Muxes and Registers
  // When start is asserted, the inputs are loaded into the divider.
  // Otherwise, the divisor is retained and the partial remainder
  // is fed back for the next iteration.
  mux2 psmux({psa[54:0], 1'b0}, {4'b0001, a}, req, psn);
  flop psflop(clk, psn, ps);
  mux2 pcmux({pca[54:0], 1'b0}, 56'b0, req, pcn);
  flop pcflop(clk, pcn, pc);
  mux2 dmux(d, {4'b0001, b}, req, dn);
  flop dflop(clk, dn, d);
  // Quotient Selection logic
  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
  // Accumulate quotient digits in a shift register
  qsel qsel(ps[55:52], pc[55:52], qp, qz, qm);
  qacc qacc(clk, req, qp, qz, qm, rp, rm);
  // Divisor Selection logic
  inv dinv(d, d_b);
  mux3 divisorsel(d_b, 56'b0, d, qp, qz, qm, dsel);
  // Partial Product Generation
  csa csa(ps, pc, dsel, qp, psa, pca);
 endmodule
 //////////
 // mux2 //
 //////////
 module mux2(input  logic [55:0] in0, in1, 
            input  logic        sel, 
            output logic [55:0] out);
   assign #1 out = sel ? in1 : in0;
 endmodule
 //////////
 // flop //
 //////////
 module flop(clk, in, out);
  input 	clk;
  input  [55:0] in;
  output [55:0] out;
  logic    [55:0] state;
  always @(posedge clk)
      state <= #1 in;
  assign #1 out = state;
 endmodule
 //////////
 // qsel //
 //////////
 module qsel(input  logic [55:52] ps, pc, 
            output logic         qp, qz, qm);
  logic [55:52]  p, g;
  logic          magnitude, sign, cout;
  // The quotient selection logic is presented for simplicity, not
  // for efficiency.  You can probably optimize your logic to
  // select the proper divisor with less delay.
  // Quotient equations from EE371 lecture notes 13-20
  assign p = ps ^ pc;
  assign g = ps & pc;
  assign #1 magnitude = ~(&p[54:52]);
  assign #1 cout = g[54] | (p[54] & (g[53] | p[53] & g[52]));
  assign #1 sign = p[55] ^ cout;
 /*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
 			  (ps[52]^pc[52]));
  assign #1 sign = (ps[55]^pc[55])^
      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
 			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
 						(ps[52]&pc[52]))))); */
  // Produce quotient = +1, 0, or -1
  assign #1 qp = magnitude & ~sign;
  assign #1 qz = ~magnitude;
  assign #1 qm = magnitude & sign;
 endmodule
 //////////
 // qacc //
 //////////
 module qacc(clk, req, qp, qz, qm, rp, rm);
  input 	clk;
  input         req;
  input 	qp;
  input 	qz;
  input 	qm;
  output [54:0] rp;
  output [54:0] rm;
  logic    [54:0] rp, rm; // quotient bit is +/- 1;
  logic    [7:0]  count;
  always @(posedge clk)
    begin
      if (req) 
 	begin
 	  rp <= #1 0;
 	  rm <= #1 0;
 	end
      else 
 	begin
 	  rp <= #1 {rp[54:0], qp};
 	  rm <= #1 {rm[54:0], qm};
 	end
    end
 endmodule
 /////////
 // inv //
 /////////
 module inv(input  logic [55:0] in, 
           output logic [55:0] out);
  assign #1 out = ~in;
 endmodule
 //////////
 // mux3 //
 //////////
 module mux3(in0, in1, in2, sel0, sel1, sel2, out);
  input  [55:0] in0;
  input  [55:0] in1;
  input  [55:0] in2;
  input         sel0;
  input         sel1;
  input         sel2;
  output [55:0] out;
  // lazy inspection of the selects
  // really we should make sure selects are mutually exclusive
  assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2);
 endmodule
 /////////
 // csa //
 /////////
 module csa(in1, in2, in3, cin, out1, out2);
  input  [55:0] in1;
  input  [55:0] in2;
  input  [55:0] in3;
  input         cin;
  output [55:0] out1;
  output [55:0] out2;
  // This block adds in1, in2, in3, and cin to produce 
  // a result out1 / out2 in carry-save redundant form.
  // cin is just added to the least significant bit and
  // is required to handle adding a negative divisor.
  // Fortunately, the carry (out2) is shifted left by one
  // bit, leaving room in the least significant bit to 
  // insert cin.
  assign #1 out1 = in1 ^ in2 ^ in3;
  assign #1 out2 = {in1[54:0] & (in2[54:0] | in3[54:0]) | 
 		    (in2[54:0] & in3[54:0]), cin};
 endmodule
 //////////////
 // finaladd //
 //////////////
 module finaladd(rp, rm, r);
  input  [54:0] rp;
  input  [54:0] rm;
  output [51:0] r;
  logic   [54:0] diff;
  // this magic block performs the final addition for you
  // to convert the positive and negative quotient digits
  // into a normalized mantissa.  It returns the 52 bit
  // mantissa after shifting to guarantee a leading 1.
  // You can assume this block operates in one cycle
  // and do not need to budget it in your area and power
  // calculations.
  // Since no rounding is performed, the result may be too 
  // small by one unit in the least significant place (ulp).
  // The checker ignores such an error.
  assign #1 diff = rp - rm;
  assign #1 r = diff[54] ? diff[53:2] : diff[52:1];
 endmodule
 /////////////
 // counter //
 /////////////
 module counter(input  logic clk, 
               input  logic req, 
               output logic done);
   logic    [5:0]  count;
  // This block of control logic sequences the divider
  // through its iterations.  You may modify it if you
  // build a divider which completes in fewer iterations.
  // You are not responsible for the (trivial) circuit
  // design of the block.
  always @(posedge clk)
    begin
      if      (count == 54) done <= #1 1;
      else if (done | req) done <= #1 0;	
      if (req) count <= #1 0;
      else     count <= #1 count+1;
    end
 endmodule
 ///////////
 // clock //
 ///////////
 module clock(clk);
  output clk;
  // Internal clk signal
  logic clk;
 endmodule
 //////////
 // testbench //
 //////////
 module testbench;
  logic         clk;
  logic        req;
  logic         done;
  logic [51:0] a;
  logic [51:0] b;
  logic  [51:0] r;
  logic [54:0] rp, rm;   // positive quotient digits
  // Test parameters
  parameter MEM_SIZE = 40000;
  parameter MEM_WIDTH = 52+52+52;
  `define memr  51:0
  `define memb  103:52
  `define mema  155:104
  // Test logicisters
  logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
  logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
                            // bit field of an array 
  logic    [51:0] correctr, nextr;
  integer testnum, errors;
  // Divider
  srt  srt(clk, req, a, b, rp, rm);
  // Final adder converts quotient digits to 2's complement & normalizes
  finaladd finaladd(rp, rm, r);
  // Counter
  counter counter(clk, req, done);
    initial
    forever
      begin
        clk = 1; #17;
        clk = 0; #16;
      end
  // Read test vectors from disk
  initial
    begin
      testnum = 0; 
      errors = 0;
      $readmemh ("testvectors", Tests);
      Vec = Tests[testnum];
      a = Vec[`mema];
      b = Vec[`memb];
      nextr = Vec[`memr];
      req <= #5 1;
    end
  // Apply directed test vectors read from file.
  always @(posedge clk)
    begin
      if (done) 
 	begin
 	  req <= #5 1;
 	  $display("result was %h, should be %h\n", r, correctr);
 	  if ((correctr - r) > 1) // check if accurate to 1 ulp
 	    begin
 	      errors = errors+1;
 	      $display("failed\n");
 	      $stop;
 	    end
 	  if (a === 52'hxxxxxxxxxxxxx)
 	    begin
 	      $display("Tests completed successfully");
 	      $stop;
 	    end
 	end
      if (req) 
 	begin
 	  req <= #5 0;
 	  correctr = nextr;
 	  testnum = testnum+1;
 	  Vec = Tests[testnum];
 	  $display("a = %h  b = %h",a,b);
 	  a = Vec[`mema];
 	  b = Vec[`memb];
 	  nextr = Vec[`memr];
 	end
    end
 endmodule
--- a/pipelined/srt/stine/Makefile
+++ b/pipelined/srt/stine/Makefile
@ -0,0 +1,27 @@
 CC      = gcc
 CFLAGS  = -lm
 LIBS    = 
 OBJS4   = disp.o srt4div.o
 OBJS2   = disp.o srt2div.o
 all:		srt4div srt2div
 disp.o:		disp.h disp.c
 		$(CC) -g -c -o disp.o disp.c 
 srt4div.o:	srt4div.c
 		$(CC) -g -c -o srt4div.o srt4div.c
 srt2div.o:	srt2div.c
 		$(CC) -g -c -o srt2div.o srt2div.c
 srt4div:  	$(OBJS4)
 		$(CC) -g -O3 -o srt4div $(OBJS4) $(CFLAGS)
 srt2div:  	$(OBJS2)
 		$(CC) -g -O3 -o srt2div $(OBJS2) $(CFLAGS)
 clean:
 	rm -f *.o *~
 	rm -f core
--- a/pipelined/srt/stine/README
+++ b/pipelined/srt/stine/README
@ -0,0 +1 @@
 vsim -do iter64.do -c
--- a/pipelined/srt/stine/README.md
+++ b/pipelined/srt/stine/README.md
@ -0,0 +1,22 @@
 This is a novel integer divider using r4 division by recurrence.  The
 reference is:
 J. E. Stine and K. Hill, "An Efficient Implementation of Radix-4
 Integer Division Using Scaling," 2020 IEEE 63rd International Midwest
 Symposium on Circuits and Systems (MWSCAS), Springfield, MA, USA,
 2020, pp. 1092-1095, doi: 10.1109/MWSCAS48704.2020.9184631.
 Although this version does not contain scaling, it could do this, if
 needed.  Moreover, a higher radix or overlapped radix can be done
 easily to expand the the size.  Also, the implementations here are
 initially unsigned but hope to expand for signed, which should be
 easy.
 There are two types of tests in this directory within each testbench.
 One tests for 32-bits and the other 64-bits:
 int32div.do and int64div.do = test individual vector for debugging
 iter32.do and iter64.do = do not use any waveform generation and just
 output lots of tests
--- a/pipelined/srt/stine/checkme.sh
+++ b/pipelined/srt/stine/checkme.sh
@ -0,0 +1,19 @@
 #!/bin/sh
 cat iter64_signed.out | grep "0 1$"
 cat iter64_signed.out | grep "1 0$"
 cat iter64_signed.out | grep "0 0$"
 cat iter64_unsigned.out | grep "0 1$"
 cat iter64_unsigned.out | grep "1 0$"
 cat iter64_unsigned.out | grep "0 0$"
 cat iter32_signed.out | grep "0 1$"
 cat iter32_signed.out | grep "1 0$"
 cat iter32_signed.out | grep "0 0$"
 cat iter32_unsigned.out | grep "0 1$"
 cat iter32_unsigned.out | grep "1 0$"
 cat iter32_unsigned.out | grep "0 0$"
 cat iter128_signed.out | grep "0 1$"
 cat iter128_signed.out | grep "1 0$"
 cat iter128_signed.out | grep "0 0$"
 cat iter128_unsigned.out | grep "0 1$"
 cat iter128_unsigned.out | grep "1 0$"
 cat iter128_unsigned.out | grep "0 0$"
--- a/pipelined/srt/stine/disp.c
+++ b/pipelined/srt/stine/disp.c
@ -0,0 +1,60 @@
 #include "disp.h"
 double rnd_zero(double x, double bits) {
  if (x < 0) 
    return ceiling(x, bits);
  else
    return flr(x, bits);
 }
 double rne(double x, double precision) {
  double scale, x_round;
  scale = pow(2.0, precision);
  x_round = rint(x * scale) / scale;
  return x_round;
 }
 double flr(double x, double precision) {
  double scale, x_round;
  scale = pow(2.0, precision);
  x_round = floor(x * scale) / scale;
  return x_round;
 }
 double ceiling(double x, double precision) {
  double scale, x_round;
  scale = pow(2.0, precision);
  x_round = ceil(x * scale) / scale;
  return x_round;
 }
 void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file) {
  double diff;
  int i;
  if (fabs(x) <  pow(2.0, -bits_to_right)) {
    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
      fprintf(out_file,"0");
    }
    return;
  }
  if (x < 0.0) {
    // fprintf(out_file, "-");
    // x = - x;
    x = pow(2.0, ((double) bits_to_left)) + x;
  }
  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
    diff = pow(2.0, -i);
    if (x < diff) {
      fprintf(out_file, "0");
    }
    else {
      fprintf(out_file, "1");
      x -= diff;
    }
    if (i == 0) {
      fprintf(out_file, ".");
    }
  }
 }
--- a/pipelined/srt/stine/disp.h
+++ b/pipelined/srt/stine/disp.h
@ -0,0 +1,18 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
 #ifndef DISP
 #define DISP
 double rnd_zero(double x, double bits);
 double rne(double x, double precision);
 double flr(double x, double precision);
 double ceiling(double x, double precision);
 void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file);
 #endif 
--- a/pipelined/srt/stine/idiv-config.vh
+++ b/pipelined/srt/stine/idiv-config.vh
@ -0,0 +1,27 @@
 //////////////////////////////////////////
 // wally-config.vh
 //
 // Written: james.stine@okstate.edu 9 June 2022
 // Modified: 
 //
 // Purpose: Specify which features are configured
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 // Integer division tests
 `define IDIV_TESTS 1048576
--- a/pipelined/srt/stine/intdiv.sv
+++ b/pipelined/srt/stine/intdiv.sv
--- a/pipelined/srt/stine/iter128.do
+++ b/pipelined/srt/stine/iter128.do
@ -0,0 +1,50 @@
 # Copyright 1991-2007 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128.sv
 # start and run simulation
 vsim -voptargs=+acc work.tb
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation
 run 999586700ns
 quit
--- a/pipelined/srt/stine/iter128S.do
+++ b/pipelined/srt/stine/iter128S.do
@ -0,0 +1,50 @@
 # Copyright 1991-2007 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128S.sv
 # start and run simulation
 vsim -voptargs=+acc work.tb
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation
 run 999586700ns
 quit
--- a/pipelined/srt/stine/iter32.do
+++ b/pipelined/srt/stine/iter32.do
@ -0,0 +1,50 @@
 # Copyright 1991-2007 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32.sv
 # start and run simulation
 vsim -voptargs=+acc work.tb
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation
 run 999586700ns
 quit
--- a/pipelined/srt/stine/iter32S.do
+++ b/pipelined/srt/stine/iter32S.do
@ -0,0 +1,50 @@
 # Copyright 1991-2007 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32S.sv
 # start and run simulation
 vsim -voptargs=+acc work.tb
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation
 run 999586700ns
 quit
--- a/pipelined/srt/stine/iter64.do
+++ b/pipelined/srt/stine/iter64.do
@ -0,0 +1,50 @@
 # Copyright 1991-2007 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64.sv
 # start and run simulation
 vsim -voptargs=+acc work.tb
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation
 run 999586700ns
 quit
--- a/pipelined/srt/stine/iter64S.do
+++ b/pipelined/srt/stine/iter64S.do
@ -0,0 +1,50 @@
 # Copyright 1991-2007 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64S.sv
 # start and run simulation
 vsim -voptargs=+acc work.tb
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation
 run 999586700ns
 quit
--- a/pipelined/srt/stine/lod.sv
+++ b/pipelined/srt/stine/lod.sv
@ -0,0 +1,182 @@
 ///////////////////////////////////////////
 // lod.sv
 //
 // Written: James.Stine@okstate.edu 1 February 2021
 // Modified: 
 //
 // Purpose: Integer Divide instructions
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module lod2 (P, V, B);
   input logic  [1:0] B;
   output logic P;
   output logic V;
   assign V = B[0] | B[1];
   assign P = B[0] & ~B[1];
 endmodule // lo2
 module lod_hier #(parameter WIDTH=8) 
   (input logic [WIDTH-1:0]          B,
    output logic [$clog2(WIDTH)-1:0] ZP,
    output logic 		     ZV);
   if (WIDTH == 128)
     lod128 lod128 (ZP, ZV, B);	      
   else if (WIDTH == 64)
     lod64 lod64 (ZP, ZV, B);	   
   else if (WIDTH == 32)
     lod32 lod32 (ZP, ZV, B);
   else if (WIDTH == 16)
     lod16 lod16 (ZP, ZV, B);
   else if (WIDTH == 8)
     lod8 lod8 (ZP, ZV, B);
   else if (WIDTH == 4)
     lod4 lod4 (ZP, ZV, B);
 endmodule // lod_hier
 module lod4 (ZP, ZV, B);
   input logic [3:0]  B;
   logic  	       ZPa;
   logic  	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;   
   output logic [1:0]  ZP;
   output logic        ZV;
   lod2 l1(ZPa, ZVa, B[1:0]);
   lod2 l2(ZPb, ZVb, B[3:2]);
   assign ZP[0:0] = ZVb ? ZPb : ZPa;
   assign ZP[1]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lod4
 module lod8 (ZP, ZV, B);
   input logic [7:0]  B;
   logic [1:0] 	       ZPa;
   logic [1:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;   
   output logic [2:0]  ZP;
   output logic        ZV;
   lod4 l1(ZPa, ZVa, B[3:0]);
   lod4 l2(ZPb, ZVb, B[7:4]);
   assign ZP[1:0] = ZVb ? ZPb : ZPa;
   assign ZP[2]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lod8
 module lod16 (ZP, ZV, B);
   input logic [15:0]  B;
   logic [2:0] 	       ZPa;
   logic [2:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;   
   output logic [3:0]  ZP;
   output logic        ZV;
   lod8 l1(ZPa, ZVa, B[7:0]);
   lod8 l2(ZPb, ZVb, B[15:8]);
   assign ZP[2:0] = ZVb ? ZPb : ZPa;
   assign ZP[3]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lod16
 module lod32 (ZP, ZV, B);
   input logic [31:0] B;
   logic [3:0] 	      ZPa;
   logic [3:0] 	      ZPb;
   logic 	      ZVa;
   logic 	      ZVb;
   output logic [4:0] ZP;
   output logic       ZV;
   lod16 l1(ZPa, ZVa, B[15:0]);
   lod16 l2(ZPb, ZVb, B[31:16]);
   assign ZP[3:0] = ZVb ? ZPb : ZPa;
   assign ZP[4]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lod32
 module lod64 (ZP, ZV, B);
   input logic [63:0]  B;
   logic [4:0] 	       ZPa;
   logic [4:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;
   output logic [5:0]  ZP;
   output logic        ZV;
   lod32 l1(ZPa, ZVa, B[31:0]);
   lod32 l2(ZPb, ZVb, B[63:32]);
   assign ZP[4:0] = ZVb ? ZPb : ZPa;
   assign ZP[5]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lod64
 module lod128 (ZP, ZV, B);
   input logic [127:0]  B;
   logic [5:0] 	       ZPa;
   logic [5:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;
   output logic [6:0]  ZP;
   output logic        ZV;
   lod64 l1(ZPa, ZVa, B[63:0]);
   lod64 l2(ZPb, ZVb, B[127:64]);
   assign ZP[5:0] = ZVb ? ZPb : ZPa;
   assign ZP[6]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lod128
--- a/pipelined/srt/stine/lzd.do
+++ b/pipelined/srt/stine/lzd.do
@ -0,0 +1,55 @@
 # Copyright 1991-2016 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog lod.sv lzd_tb.sv
 # start and run simulation
 vsim -voptargs=+acc work.stimulus
 view wave
 -- display input and output signals as hexidecimal values
 # Diplays All Signals recursively
 add wave -hex -r /stimulus/*
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 350
 configure wave -valuecolwidth 200
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation 
 run 800ns
 quit
--- a/pipelined/srt/stine/lzd.sv
+++ b/pipelined/srt/stine/lzd.sv
@ -0,0 +1,182 @@
 ///////////////////////////////////////////
 // lzd.sv
 //
 // Written: James.Stine@okstate.edu 1 February 2021
 // Modified: 
 //
 // Purpose: Integer Divide instructions
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module lzd2 (P, V, B);
   input logic  [1:0] B;
   output logic P;
   output logic V;
   assign V = ~(B[0] & B[1]);
   assign P = B[1];
 endmodule // lzd2
 module lzd_hier #(parameter WIDTH=8) 
   (input logic [WIDTH-1:0]          B,
    output logic [$clog2(WIDTH)-1:0] ZP,
    output logic 		     ZV);
   if (WIDTH == 128)
     lzd128 lzd127 (ZP, ZV, B);	      
   else if (WIDTH == 64)
     lzd64 lzd64 (ZP, ZV, B);	   
   else if (WIDTH == 32)
     lzd32 lzd32 (ZP, ZV, B);
   else if (WIDTH == 16)
     lzd16 lzd16 (ZP, ZV, B);
   else if (WIDTH == 8)
     lzd8 lzd8 (ZP, ZV, B);
   else if (WIDTH == 4)
     lzd4 lzd4 (ZP, ZV, B);
 endmodule // lzd_hier
 module lzd4 (ZP, ZV, B);
   input logic [3:0]  B;
   logic  	       ZPa;
   logic  	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;   
   output logic [1:0]  ZP;
   output logic        ZV;
   lzd2 l1 (ZPa, ZVa, B[1:0]);
   lzd2 l2 (ZPb, ZVb, B[3:2]);
   assign ZP[0:0] = ZVb ? ZPb : ZPa;
   assign ZP[1]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lzd4
 module lzd8 (ZP, ZV, B);
   input logic [7:0]  B;
   logic [1:0] 	       ZPa;
   logic [1:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;   
   output logic [2:0]  ZP;
   output logic        ZV;
   lzd4 l1 (ZPa, ZVa, B[3:0]);
   lzd4 l2 (ZPb, ZVb, B[7:4]);
   assign ZP[1:0] = ZVb ? ZPb : ZPa;
   assign ZP[2]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lzd8
 module lzd16 (ZP, ZV, B);
   input logic [15:0]  B;
   logic [2:0] 	       ZPa;
   logic [2:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;   
   output logic [3:0]  ZP;
   output logic        ZV;
   lzd8 l1 (ZPa, ZVa, B[7:0]);
   lzd8 l2 (ZPb, ZVb, B[15:8]);
   assign ZP[2:0] = ZVb ? ZPb : ZPa;
   assign ZP[3]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lzd16
 module lzd32 (ZP, ZV, B);
   input logic [31:0] B;
   logic [3:0] 	      ZPa;
   logic [3:0] 	      ZPb;
   logic 	      ZVa;
   logic 	      ZVb;
   output logic [4:0] ZP;
   output logic       ZV;
   lzd16 l1 (ZPa, ZVa, B[15:0]);
   lzd16 l2 (ZPb, ZVb, B[31:16]);
   assign ZP[3:0] = ZVb ? ZPb : ZPa;
   assign ZP[4]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lzd32
 module lzd64 (ZP, ZV, B);
   input logic [63:0]  B;
   logic [4:0] 	       ZPa;
   logic [4:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;
   output logic [5:0]  ZP;
   output logic        ZV;
   lzd32 l1 (ZPa, ZVa, B[31:0]);
   lzd32 l2 (ZPb, ZVb, B[63:32]);
   assign ZP[4:0] = ZVb ? ZPb : ZPa;
   assign ZP[5]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lzd64
 module lzd128 (ZP, ZV, B);
   input logic [127:0]  B;
   logic [5:0] 	       ZPa;
   logic [5:0] 	       ZPb;
   logic 	       ZVa;
   logic 	       ZVb;
   output logic [6:0]  ZP;
   output logic        ZV;
   lzd64 l1 (ZPa, ZVa, B[64:0]);
   lzd64 l2 (ZPb, ZVb, B[127:63]);
   assign ZP[5:0] = ZVb ? ZPb : ZPa;
   assign ZP[6]   = ~ZVb;
   assign ZV = ZVa | ZVb;
 endmodule // lzd128
--- a/pipelined/srt/stine/lzd_tb.sv
+++ b/pipelined/srt/stine/lzd_tb.sv
@ -0,0 +1,59 @@
 //
 // File name : tb
 // Title     : test
 // project   : HW3
 // Library   : test
 // Purpose   : definition of modules for testbench 
 // notes :   
 //
 // Copyright Oklahoma State University
 //
 // Top level stimulus module
 `timescale 1ns/1ps
 module stimulus;
   logic [7:0] B;   
   logic [2:0] ZP;   
   logic       ZV;      
   logic       clk;   
   integer     handle3;
   integer     desc3;
   integer     i;   
   // instatiate part to test
   lzd_hier #(8) dut (B, ZP, ZV);
   initial 
     begin	
 	clk = 1'b1;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	handle3 = $fopen("lzd.out");
 	desc3 = handle3;	
     end
   initial
     begin
 	for (i=0; i < 256; i=i+1)
 	  begin
 	     // Put vectors before beginning of clk
 	     @(posedge clk)
 	       begin
 		  B = $random;
 	       end
 	     @(negedge clk)
 	       begin
 		  $fdisplay(desc3, "%b || %b %b", B, ZP, ZV);
 	       end
 	  end // for (i=0; i < 256; i=i+1)
 	$finish;// 	
     end // initial begin   
 endmodule // stimulus
--- a/pipelined/srt/stine/mux.sv
+++ b/pipelined/srt/stine/mux.sv
@ -0,0 +1,51 @@
 module mux2 #(parameter WIDTH = 8)
   (input  logic [WIDTH-1:0] d0, d1, 
    input logic 	     s, 
    output logic [WIDTH-1:0] y);
   assign y = s ? d1 : d0;
 endmodule // mux2
 module mux3 #(parameter WIDTH = 8)
   (input  logic [WIDTH-1:0] d0, d1, d2,
    input logic [1:0] 	     s, 
    output logic [WIDTH-1:0] y);
   assign y = s[1] ? d2 : (s[0] ? d1 : d0);
 endmodule // mux3
 module mux4 #(parameter WIDTH = 8)
   (input  logic [WIDTH-1:0] d0, d1, d2, d3,
    input logic [1:0] 	     s, 
    output logic [WIDTH-1:0] y);
   assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
 endmodule // mux4
 module mux21x32 (Z, A, B, Sel);
   input logic [31:0]  A;
   input logic [31:0]  B;
   input logic	       Sel;
   output logic [31:0] Z;
   assign Z = Sel ? B : A;
 endmodule // mux21x32
 module mux21x64 (Z, A, B, Sel);
   input logic [63:0]  A;
   input logic [63:0]  B;
   input logic 	       Sel;
   output logic [63:0] Z;
   assign Z = Sel ? B : A;
 endmodule // mux21x64
--- a/pipelined/srt/stine/notes
+++ b/pipelined/srt/stine/notes
@ -0,0 +1,30 @@
 Dividend x --(0.10101111), divisord --(0.11000101)(i -- 16(0.1100)2- 12)
 X = 175 (xAF)
 D = 197 (xC5)
 X = 175/256 = 0.68359375
 D = 197/256 = 0.76953125
 Note: Add lg(r) extra iterations due to shifting of computed q
      q_{computed} = q / radix
 ./srt4div 0.68359375 0.76953125 8 10
 r=2
 X = 0.10011111
 D = 0.11000101
 X = 159 (9F)
 D = 197 (C5)
 X = 159/256 = 0.62109375
 D = 197/256 = 0.76953125
 ./srt2div 0.62109375 0.76953125 8 9
--- a/pipelined/srt/stine/otf4.in
+++ b/pipelined/srt/stine/otf4.in
@ -0,0 +1,23 @@
 .i 4
 .o 6
 .ilb quot[3] quot[2] quot[1] quot[0]
 .ob Qin[1] Qin[0] QMin[1] QMin[0] CshiftQ CshiftQM
 0000 001100
 0001 100110
 0010 111010
 0011 ------
 0100 010001
 0101 ------
 0110 ------
 0111 ------
 1000 100101
 1001 ------
 1010 ------
 1011 ------
 1100 ------
 1101 ------
 1110 ------
 1111 ------
 .e
--- a/pipelined/srt/stine/pd_bad.png
+++ b/pipelined/srt/stine/pd_bad.png
--- a/pipelined/srt/stine/pd_cpa.png
+++ b/pipelined/srt/stine/pd_cpa.png
--- a/pipelined/srt/stine/pd_csa.pdf
+++ b/pipelined/srt/stine/pd_csa.pdf
--- a/pipelined/srt/stine/pd_csa.png
+++ b/pipelined/srt/stine/pd_csa.png
--- a/pipelined/srt/stine/qslc_r4a2
+++ b/pipelined/srt/stine/qslc_r4a2
--- a/pipelined/srt/stine/qslc_r4a2.c
+++ b/pipelined/srt/stine/qslc_r4a2.c
@ -0,0 +1,198 @@
 /*
  Program:      qslc_r4a2.c
  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
  User:         James E. Stine
 */
 #include <stdio.h>
 #include <math.h>
 #define DIVISOR_SIZE 3
 #define CARRY_SIZE 7
 #define SUM_SIZE 7
 #define TOT_SIZE 7
 void disp_binary(double, int, int);
 struct bits {
  unsigned int divisor : DIVISOR_SIZE;
  int tot : TOT_SIZE;
 } pla;
 /* 
   Function:      disp_binary
   Description:   This function displays a Double-Precision number into
   four 16 bit integers using the global union variable 
   dp_number
   Argument List: double x            The value to be converted
   int bits_to_left    Number of bits left of radix point
   int bits_to_right   Number of bits right of radix point
   Return value:  none
 */
 void disp_binary(double x, int bits_to_left, int bits_to_right) {
  int i; 
  double diff;
  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
      printf("0");
    }
    if (i == bits_to_right+1) 
      ;
    return;
  }
  if (x < 0.0) 
    x = pow(2.0, ((double) bits_to_left)) + x;
  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
    diff = pow(2.0, ((double) -i) );
    if (x < diff) 
      printf("0");
    else {
      printf("1");
      x -= diff;
    }
    if (i == 0) 
      ;
  }
 }
 int main() {
  int m;
  int n;
  int o;
  pla.divisor = 0;
  pla.tot = 0;
  printf("\tcase({D[5:3],Wmsbs})\n");
  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
      printf("\t\t10'b");
      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
      printf("_");
      disp_binary((double) pla.tot, TOT_SIZE, 0);
      printf(": q = 4'b");
      /*
 	4 bits for Radix 4 (a=2)
 	1000 = +2
 	0100 = +1
 	0000 =  0
 	0010 = -1
 	0001 = -2		
      */
      switch (pla.divisor) {
      case 0:
 	if ((pla.tot) >= 12)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -4)
 	  printf("0000");
 	else if ((pla.tot) >= -13)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 1:
 	if ((pla.tot) >= 14)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -6)
 	  printf("0000");
 	else if ((pla.tot) >= -15)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 2:
 	if ((pla.tot) >= 15)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -6)
 	  printf("0000");
 	else if ((pla.tot) >= -16)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 3:
 	if ((pla.tot) >= 16)
 	  printf("1000");
 	else if ((pla.tot) >= 4)
 	  printf("0100");
 	else if ((pla.tot) >= -6)
 	  printf("0000");
 	else if ((pla.tot) >= -18)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 4:
 	if ((pla.tot) >= 18)
 	  printf("1000");
 	else if ((pla.tot) >= 6)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -20)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 5:
 	if ((pla.tot) >= 20)
 	  printf("1000");
 	else if ((pla.tot) >= 6)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -20)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 6:
 	if ((pla.tot) >= 20)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -22)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      case 7:
 	if ((pla.tot) >= 24)
 	  printf("1000");
 	else if ((pla.tot) >= 8)
 	  printf("0100");
 	else if ((pla.tot) >= -8)
 	  printf("0000");
 	else if ((pla.tot) >= -24)
 	  printf("0010");
 	else
 	  printf("0001");
 	break;
      default: printf ("XXX");
      }
      printf(";\n");
      (pla.tot)++;
    }
    (pla.divisor)++;
  }
  printf("\tendcase\n");
 }
--- a/pipelined/srt/stine/run.sh
+++ b/pipelined/srt/stine/run.sh
@ -0,0 +1,8 @@
 #!/bin/sh
 vsim -do iter32S.do -c
 vsim -do iter32.do -c
 vsim -do iter64.do -c
 vsim -do iter64S.do -c
 vsim -do iter128.do -c
 vsim -do iter128S.do -c
--- a/pipelined/srt/stine/shift.sv
+++ b/pipelined/srt/stine/shift.sv
@ -0,0 +1,73 @@
 ///////////////////////////////////////////
 // shifters.sv
 //
 // Written: James.Stine@okstate.edu 1 February 2021
 // Modified: 
 //
 // Purpose: Integer Divide instructions
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module shift_right #(parameter WIDTH=8) 
   (input logic [WIDTH-1:0]         A,
    input logic [$clog2(WIDTH)-1:0] Shift,
    output logic [WIDTH-1:0] 	    Z);
   logic [WIDTH-1:0] 		    stage [$clog2(WIDTH):0];
   logic 			    sign;   
   genvar 			    i;
   assign stage[0] = A;   
   generate
      for (i=0;i<$clog2(WIDTH);i=i+1)
 	begin : genbit
 	   mux2 #(WIDTH) mux_inst (stage[i], 
 				   {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, 
 				   Shift[$clog2(WIDTH)-i-1], 
 				   stage[i+1]);
 	end
   endgenerate
   assign Z = stage[$clog2(WIDTH)];   
 endmodule // shift_right
 module shift_left #(parameter WIDTH=8) 
   (input logic [WIDTH-1:0]         A,
    input logic [$clog2(WIDTH)-1:0] Shift,
    output logic [WIDTH-1:0] 	    Z);
   logic [WIDTH-1:0] 		    stage [$clog2(WIDTH):0];
   genvar 			    i;
   assign stage[0] = A;   
   generate
      for (i=0;i<$clog2(WIDTH);i=i+1)
 	begin : genbit
 	   mux2 #(WIDTH) mux_inst (stage[i], 
 				   {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, 
 				   Shift[$clog2(WIDTH)-i-1], 
 				   stage[i+1]);
 	end
   endgenerate
   assign Z = stage[$clog2(WIDTH)];   
 endmodule // shift_left
--- a/pipelined/srt/stine/shift_left.do
+++ b/pipelined/srt/stine/shift_left.do
@ -0,0 +1,55 @@
 # Copyright 1991-2016 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv shift.sv shift_left_tb.sv
 # start and run simulation
 vsim -voptargs=+acc work.stimulus
 view wave
 -- display input and output signals as hexidecimal values
 # Diplays All Signals recursively
 add wave -hex -r /stimulus/*
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation 
 run 800ns
 quit
--- a/pipelined/srt/stine/shift_left_tb.sv
+++ b/pipelined/srt/stine/shift_left_tb.sv
@ -0,0 +1,71 @@
 //
 // File name : tb
 // Title     : test
 // project   : HW3
 // Library   : test
 // Purpose   : definition of modules for testbench 
 // notes :   
 //
 // Copyright Oklahoma State University
 //
 // Top level stimulus module
 `timescale 1ns/1ps
 `define XLEN 32
 module stimulus;
   logic [`XLEN-1:0]         A;   
   logic [$clog2(`XLEN)-1:0] Shift;   
   logic [`XLEN-1:0] 	     Z;
   logic [`XLEN-1:0] 	     Z_corr;      
   //logic [63:0]       A;
   //logic [5:0] 	      Shift;
   //logic [63:0]       Z;
   //logic [63:0]       Z_corr;
   //logic [63:0]       Z_orig;
   logic 	 clk;   
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   // instatiate part to test
   shift_left dut1 (A, Shift, Z);
   assign Z_corr = (A << Shift);   
   initial 
     begin	
 	clk = 1'b1;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	handle3 = $fopen("shift_left.out");
 	desc3 = handle3;	
     end
   initial
     begin
 	for (i=0; i < 256; i=i+1)
 	  begin
 	     // Put vectors before beginning of clk
 	     @(posedge clk)
 	       begin
 		  A = $random;
 		  Shift = $random;
 	       end
 	     @(negedge clk)
 	       begin
 		  $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr));
 	       end
 	  end // for (i=0; i < 256; i=i+1)
 	$finish;// 	
     end // initial begin   
 endmodule // stimulus
--- a/pipelined/srt/stine/shift_right.do
+++ b/pipelined/srt/stine/shift_right.do
@ -0,0 +1,55 @@
 # Copyright 1991-2016 Mentor Graphics Corporation
 # 
 # Modification by Oklahoma State University
 # Use with Testbench 
 # James Stine, 2008
 # Go Cowboys!!!!!!
 #
 # All Rights Reserved.
 #
 # THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION
 # WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION
 # OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
 # Use this run.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do run.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do run.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 vlog mux.sv shift.sv shift_right_tb.sv
 # start and run simulation
 vsim -voptargs=+acc work.stimulus
 view wave
 -- display input and output signals as hexidecimal values
 # Diplays All Signals recursively
 add wave -hex -r /stimulus/*
 -- Set Wave Output Items 
 TreeUpdate [SetDefaultTree]
 WaveRestoreZoom {0 ps} {75 ns}
 configure wave -namecolwidth 150
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
 configure wave -snapdistance 10
 configure wave -datasetprefix 0
 configure wave -rowmargin 4
 configure wave -childrowmargin 2
 -- Run the Simulation 
 run 800ns
 quit
--- a/pipelined/srt/stine/shift_right_tb.sv
+++ b/pipelined/srt/stine/shift_right_tb.sv
@ -0,0 +1,64 @@
 //
 // File name : tb
 // Title     : test
 // project   : HW3
 // Library   : test
 // Purpose   : definition of modules for testbench 
 // notes :   
 //
 // Copyright Oklahoma State University
 //
 // Top level stimulus module
 `timescale 1ns/1ps
 `define XLEN 32
 module stimulus;
   logic [`XLEN-1:0]         A;   
   logic [$clog2(`XLEN)-1:0] Shift;   
   logic [`XLEN-1:0] 	     Z;
   logic [`XLEN-1:0] 	     Z_corr;      
   logic 	 clk;   
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   // instatiate part to test
   shift_right dut1 (A, Shift, Z);
   assign Z_corr = (A >> Shift);   
   initial 
     begin	
 	clk = 1'b1;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	handle3 = $fopen("shift_right.out");
 	desc3 = handle3;	
 	#250 $finish;		
     end
   initial
     begin
 	for (i=0; i < 128; i=i+1)
 	  begin
 	     // Put vectors before beginning of clk
 	     @(posedge clk)
 	       begin
 		  A = $random;
 		  Shift = $random;
 	       end
 	     @(negedge clk)
 	       begin
 		  $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr));
 	       end
 	  end // @(negedge clk)
     end // for (j=0; j < 32; j=j+1)
 endmodule // stimulus
--- a/pipelined/srt/stine/shifter.sv
+++ b/pipelined/srt/stine/shifter.sv
@ -0,0 +1,18 @@
 module shifter_right(input logic signed [63:0] a,
 		     input logic [ 5:0] 	shamt,
 		     output logic signed [63:0] y);
   y = a >> shamt;
 endmodule // shifter_right
 module shifter_left(input logic signed [63:0] a,
 		    input logic [ 5:0] 	       shamt,
 		    output logic signed [63:0] y);
   y = a << shamt;
 endmodule // shifter_right
--- a/pipelined/srt/stine/srt2div
+++ b/pipelined/srt/stine/srt2div
--- a/pipelined/srt/stine/srt2div.c
+++ b/pipelined/srt/stine/srt2div.c
@ -0,0 +1,114 @@
 #include "disp.h"
 // QSLC is for division by recuerrence for
 // r=2 using a CPA - See 5.109 EL
 int qst (double D, double prem) {
  int q;
  // For Debugging
  printf("rw --> %lg\n", prem);  
  if (prem >=  0.5) {
    q = 1;
  } else if (prem >= -0.5) {
    q = 0;
  } else {
    q = -1;
  }
  return q;
 }
 /*
 This routine performs a radix-2 SRT division 
 algorithm.  The user inputs the numerator, the denominator, 
 and the number of iterations. It assumes that 0.5 <= D < 1.
 */
 int main(int argc, char* argv[]) {
   double P, N, D, Q, RQ, RD, RREM, scale;   
   int q;
   int num_iter, i;
   int prec;
   int radix = 2;
   if (argc < 5) {
      fprintf(stderr,
 	      "Usage: %s numerator denominator num_iterations prec\n", 
 	      argv[0]);
      exit(1);
   }
   sscanf(argv[1],"%lg", &N);
   sscanf(argv[2],"%lg", &D);
   sscanf(argv[3],"%d", &num_iter);
   sscanf(argv[4],"%d", &prec);
   // Round to precision
   N = rne(N, prec);
   D = rne(D, prec);
   printf("N = ");
   disp_bin(N, 3, prec, stdout);
   printf("\n");
   printf("D = ");
   disp_bin(D, 3, prec, stdout);
   printf("\n");
   Q = 0;
   P = N * pow(2.0, -log2(radix));
   printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n", 
 	  N, D, N/D, num_iter); 
   for (scale = 1, i = 0; i < num_iter; i++) {
     scale = scale * pow(2.0, -log2(radix));
     q = qst(flr(2*D, 1), 2*P);
     printf("2*W[n] = ");
     disp_bin(radix*P, 3, prec, stdout);
     printf("\n");
     printf("q*D = ");      
     disp_bin(q*D, 3, prec, stdout);
     printf("\n");
     printf("W[n+1] = ");            
     disp_bin(P ,3, prec, stdout);
     printf("\n");     
     // Recurrence
     P = radix * P - q * D;
     Q = Q + q*scale;
     printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P); 
     printf("i = %d, q = %d", i, q);
     printf(", Q = ");
     disp_bin(Q, 3, prec, stdout);
     printf(", W = ");
     disp_bin(P, 3, prec, stdout);
     printf("\n\n");
   }
   if (P < 0) {
     Q = Q - scale;
     P = P + D;
     printf("\nCorrecting Negative Remainder\n");
     printf("Q = %1.18lf, W = %1.18lf\n", Q, P);
     printf("Q = ");
     disp_bin(Q, 3, prec, stdout);
     printf(", W = ");
     disp_bin(P, 3, prec, stdout);
     printf("\n");
   }
   // Output Results
   RQ = N/D;
   // Since q_{computed} = q / radix, multiply by radix
   RD = Q * radix;
   printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
   printf("true = ");
   disp_bin(RQ, 3, prec, stdout);
   printf(", computed = ");
   disp_bin(RD, 3, prec, stdout);
   printf("\n\n");
   printf("REM = %1.18lf \n", P);
   printf("REM = ");
   disp_bin(P, 3, prec, stdout);
   printf("\n\n");
   return 0;
 }
--- a/pipelined/srt/stine/srt4_pd.m
+++ b/pipelined/srt/stine/srt4_pd.m
@ -0,0 +1,508 @@
 %
 % PD Region for Np   = 3;  Nd   = 4;
 % w/CPA
 %
 % Clear all variables and screen
 clear
 clf
 % Define the number of bits (input Dividend)
 n = 4;
 %
 % Define Divisor Range
 % Normalized Floating Point [Dmin,Dmax] = [1,2]
 % Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
 %
 Dminimum = 1.0/2;
 Dmaximum = 2.0/2;
 % Define an ulp
 ulp = 2^(-n);
 % radix = beta
 beta  = 4;
 % rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
 %
 % SD representations have alpha < beta - 1
 %
 % alpha = ceil(beta/2)  minimially redundant  
 % alpha = beta -1       maximally redundant (rho = 1)
 % alpha = (beta-1)/2    nonredundant
 % alpha > beta - 1      over-redundant
 % 
 rho = 2/3;
 % Calculation of max digit set
 alpha = rho*(beta-1);
 % Da contains digit set
 q = [];
 for i = -alpha:alpha
  q = [q; i];
 end
 % 4r(i-1)/D values
 hold on
 % figure(1)
 grid off
 for i = 1:length(q)
  x = -rho+q(i):ulp:rho+q(i);
  % Plot redundancy (overlap) Positive
  z = [rho+q(i),rho+q(i)];
  y = [x(length(x))-q(i),0];
  % Plot redundancy (overlap) Negative
  if (i ~= length(q))
    w = [-rho+q(i+1)-q(i+1),0];
    u = [-rho+q(i+1),-rho+q(i+1)];
    % plot(u,w,'b')
  end
  % plot(x,x-q(i))
  % plot(z,y,'r')
 end
 % title('Robertson Diagram for Radix-4 SRT Divison')
 Np   = 3;
 Nd   = 4;
 Dmin = Dminimum;
 Dmax = Dmaximum;
 ulpd = 2^(-Nd);
 ulpp = 2^(-Np);
 %
 % Plot Atkins P-D plot
 % Normalized Floating Point [Dmin,Dmax] = [1,2]
 % Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
 %
 Dmin = Dminimum;
 Dmax = Dmaximum;
 for i = 1:length(q)
  D = Dmin:ulp:Dmax;
  P1 = (rho+q(i))*D;
  P2 = (-rho+q(i))*D;
  hold on
  p1 = plot(D,P1);
  p1.Color = '#0000ff';
  p2 = plot(D,P2);
  p2.Color = '#ff0000';
  axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
  xticks(D)
  p1.LineWidth = 2.0;
  p2.LineWidth = 2.0;
 end
 % Let's make x/y axis binary
 j = [];
 for i=1:length(D)
    j = [j disp_bin(D(i), 1, 4)];
 end
 yk = [];
 yk2 = [];
 for i=-2.5:0.5:2.5;
    yk = [yk disp_bin(i, 3, 3)];
    yk2 = [yk2 i];
 end
 xtickangle(90)
 xticklabels(j)
 yticklabels(yk)
 % Let's draw allow points on PD plot
 % Positive Portions
 index = 1;
 i = 0:ulpp:rho*beta*Dmaximum;
 for j = Dmin:ulpd:Dmax
  plot(j*ones(1,length(i)),i,'k')
 end
 j = Dmin:ulpd:Dmax;
 for i = 0:ulpp:rho*beta*Dmaximum
  plot(j,i*ones(length(j)),'k')
 end
 % Negative Portions
 index = 1;
 i = 0:-ulpp:rho*-beta*Dmaximum;
 for j = Dmin:ulpd:Dmax
  plot(j*ones(1,length(i)),i,'k')
 end
 j = Dmin:ulpd:Dmax;
 for i = 0:-ulpp:-rho*beta*Dmaximum
  plot(j,i*ones(length(j)),'k')
 end
 % Labels and Printing
 xlh = xlabel(['Divisor (d)']);
 %xlh.FontSize = 18;
 xlh.Position(2) = xlh.Position(2) - 0.1;
 ylh = ylabel(['P = 4 \cdot w_i']);
 ylh.Position(1) = ylh.Position(1)-0.02;
 %ylh.FontSize = 18;
 % Containment Values (placed manually although not bad)
 m2 = [3/4 7/8 1.0 1.0 5/4 5/4 5/4 3/2 3/2];
 m1 = [1/4 1/4 1/4 1/4 1/2 1/2 1/2 1/2 1/2];
 m0 = [-1/4 -1/4 -1/4 -1/4 -1/2 -1/2 -1/2 -1/2 -1/2];
 m1b = [-3/4 -7/8 -1 -1 -5/4 -5/4 -5/4 -3/2 -3/2];
 x2 = Dmin:ulpd:Dmax;
 s2 = stairs(x2, m2);
 s2.Color = '#8f08d1';
 s2.LineWidth = 3.0;
 %s2.LineStyle = '--';
 s1 = stairs(x2, m1);
 s1.Color = '#8f08d1';
 s1.LineWidth = 3.0;
 s0 = stairs(x2, m0);
 s0.Color = '#8f08d1';
 s0.LineWidth = 3.0;
 s1b = stairs(x2, m1b);
 s1b.Color = '#8f08d1';
 s1b.LineWidth = 3.0;
 % Place manually Quotient (ugh)
 j = Dmin+ulpd/2:ulpd:Dmax;
 i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum;
 text(j(1), i(1), '2')
 text(j(1), i(2), '2')
 text(j(1), i(3), '2')
 text(j(1), i(4), '2')
 text(j(1), i(5), '2')
 text(j(1), i(6), '2')
 text(j(1), i(7), '2')
 text(j(1), i(8), '2')
 text(j(1), i(9), '2')
 text(j(1), i(10), '2')
 text(j(1), i(11), '2')
 text(j(1), i(12), '2')
 text(j(1), i(13), '2')
 text(j(1), i(14), '2')
 text(j(1), i(15), '2')
 text(j(1), i(16), '1')
 text(j(1), i(17), '1')
 text(j(1), i(18), '1')
 text(j(1), i(19), '1')
 text(j(1), i(20), '0')
 text(j(1), i(21), '0')
 text(j(1), i(22), '0')
 text(j(1), i(23), '0')
 text(j(1), i(24), '-1')
 text(j(1), i(25), '-1')
 text(j(1), i(26), '-1')
 text(j(1), i(27), '-1')
 text(j(1), i(28), '-2')
 text(j(1), i(29), '-2')
 text(j(1), i(30), '-2')
 text(j(1), i(31), '-2')
 text(j(1), i(32), '-2')
 text(j(1), i(33), '-2')
 text(j(1), i(34), '-2')
 text(j(1), i(35), '-2')
 text(j(1), i(36), '-2')
 text(j(1), i(37), '-2')
 text(j(1), i(38), '-2')
 text(j(1), i(39), '-2')
 text(j(1), i(40), '-2')
 text(j(1), i(41), '-2')
 text(j(1), i(42), '-2')
 text(j(2), i(1), '2')
 text(j(2), i(2), '2')
 text(j(2), i(3), '2')
 text(j(2), i(4), '2')
 text(j(2), i(5), '2')
 text(j(2), i(6), '2')
 text(j(2), i(7), '2')
 text(j(2), i(8), '2')
 text(j(2), i(9), '2')
 text(j(2), i(10), '2')
 text(j(2), i(11), '2')
 text(j(2), i(12), '2')
 text(j(2), i(13), '2')
 text(j(2), i(14), '2')
 text(j(2), i(15), '1')
 text(j(2), i(16), '1')
 text(j(2), i(17), '1')
 text(j(2), i(18), '1')
 text(j(2), i(19), '1')
 text(j(2), i(20), '0')
 text(j(2), i(21), '0')
 text(j(2), i(22), '0')
 text(j(2), i(23), '0')
 text(j(2), i(24), '-1')
 text(j(2), i(25), '-1')
 text(j(2), i(26), '-1')
 text(j(2), i(27), '-1')
 text(j(2), i(28), '-1')
 text(j(2), i(29), '-2')
 text(j(2), i(30), '-2')
 text(j(2), i(31), '-2')
 text(j(2), i(32), '-2')
 text(j(2), i(33), '-2')
 text(j(2), i(34), '-2')
 text(j(2), i(35), '-2')
 text(j(2), i(36), '-2')
 text(j(2), i(37), '-2')
 text(j(2), i(38), '-2')
 text(j(2), i(39), '-2')
 text(j(2), i(40), '-2')
 text(j(2), i(41), '-2')
 text(j(2), i(42), '-2')
 text(j(3), i(1), '2')
 text(j(3), i(2), '2')
 text(j(3), i(3), '2')
 text(j(3), i(4), '2')
 text(j(3), i(5), '2')
 text(j(3), i(6), '2')
 text(j(3), i(7), '2')
 text(j(3), i(8), '2')
 text(j(3), i(9), '2')
 text(j(3), i(10), '2')
 text(j(3), i(11), '2')
 text(j(3), i(12), '2')
 text(j(3), i(13), '2')
 text(j(3), i(14), '1')
 text(j(3), i(15), '1')
 text(j(3), i(16), '1')
 text(j(3), i(17), '1')
 text(j(3), i(18), '1')
 text(j(3), i(19), '1')
 text(j(3), i(20), '0')
 text(j(3), i(21), '0')
 text(j(3), i(22), '0')
 text(j(3), i(23), '0')
 text(j(3), i(24), '-1')
 text(j(3), i(25), '-1')
 text(j(3), i(26), '-1')
 text(j(3), i(27), '-1')
 text(j(3), i(28), '-1')
 text(j(3), i(29), '-1')
 text(j(3), i(30), '-2')
 text(j(3), i(31), '-2')
 text(j(3), i(32), '-2')
 text(j(3), i(33), '-2')
 text(j(3), i(34), '-2')
 text(j(3), i(35), '-2')
 text(j(3), i(36), '-2')
 text(j(3), i(37), '-2')
 text(j(3), i(38), '-2')
 text(j(3), i(39), '-2')
 text(j(3), i(40), '-2')
 text(j(3), i(41), '-2')
 text(j(3), i(42), '-2')
 text(j(4), i(1), '2')
 text(j(4), i(2), '2')
 text(j(4), i(3), '2')
 text(j(4), i(4), '2')
 text(j(4), i(5), '2')
 text(j(4), i(6), '2')
 text(j(4), i(7), '2')
 text(j(4), i(8), '2')
 text(j(4), i(9), '2')
 text(j(4), i(10), '2')
 text(j(4), i(11), '2')
 text(j(4), i(12), '2')
 text(j(4), i(13), '2')
 text(j(4), i(14), '1')
 text(j(4), i(15), '1')
 text(j(4), i(16), '1')
 text(j(4), i(17), '1')
 text(j(4), i(18), '1')
 text(j(4), i(19), '1')
 text(j(4), i(20), '0')
 text(j(4), i(21), '0')
 text(j(4), i(22), '0')
 text(j(4), i(23), '0')
 text(j(4), i(24), '-1')
 text(j(4), i(25), '-1')
 text(j(4), i(26), '-1')
 text(j(4), i(27), '-1')
 text(j(4), i(28), '-1')
 text(j(4), i(29), '-1')
 text(j(4), i(30), '-2')
 text(j(4), i(31), '-2')
 text(j(4), i(32), '-2')
 text(j(4), i(33), '-2')
 text(j(4), i(34), '-2')
 text(j(4), i(35), '-2')
 text(j(4), i(36), '-2')
 text(j(4), i(37), '-2')
 text(j(4), i(38), '-2')
 text(j(4), i(39), '-2')
 text(j(4), i(40), '-2')
 text(j(4), i(41), '-2')
 text(j(4), i(42), '-2')
 text(j(5), i(1), '2')
 text(j(5), i(2), '2')
 text(j(5), i(3), '2')
 text(j(5), i(4), '2')
 text(j(5), i(5), '2')
 text(j(5), i(6), '2')
 text(j(5), i(7), '2')
 text(j(5), i(8), '2')
 text(j(5), i(9), '2')
 text(j(5), i(10), '2')
 text(j(5), i(11), '2')
 text(j(5), i(12), '1')
 text(j(5), i(13), '1')
 text(j(5), i(14), '1')
 text(j(5), i(15), '1')
 text(j(5), i(16), '1')
 text(j(5), i(17), '1')
 text(j(5), i(18), '0')
 text(j(5), i(19), '0')
 text(j(5), i(20), '0')
 text(j(5), i(21), '0')
 text(j(5), i(22), '0')
 text(j(5), i(23), '0')
 text(j(5), i(24), '0')
 text(j(5), i(25), '0')
 text(j(5), i(26), '-1')
 text(j(5), i(27), '-1')
 text(j(5), i(28), '-1')
 text(j(5), i(29), '-1')
 text(j(5), i(30), '-1')
 text(j(5), i(31), '-1')
 text(j(5), i(32), '-2')
 text(j(5), i(33), '-2')
 text(j(5), i(34), '-2')
 text(j(5), i(35), '-2')
 text(j(5), i(36), '-2')
 text(j(5), i(37), '-2')
 text(j(5), i(38), '-2')
 text(j(5), i(39), '-2')
 text(j(5), i(40), '-2')
 text(j(5), i(41), '-2')
 text(j(5), i(42), '-2')
 text(j(6), i(1), '2')
 text(j(6), i(2), '2')
 text(j(6), i(3), '2')
 text(j(6), i(4), '2')
 text(j(6), i(5), '2')
 text(j(6), i(6), '2')
 text(j(6), i(7), '2')
 text(j(6), i(8), '2')
 text(j(6), i(9), '2')
 text(j(6), i(10), '2')
 text(j(6), i(11), '2')
 text(j(6), i(12), '1')
 text(j(6), i(13), '1')
 text(j(6), i(14), '1')
 text(j(6), i(15), '1')
 text(j(6), i(16), '1')
 text(j(6), i(17), '1')
 text(j(6), i(18), '0')
 text(j(6), i(19), '0')
 text(j(6), i(20), '0')
 text(j(6), i(21), '0')
 text(j(6), i(22), '0')
 text(j(6), i(23), '0')
 text(j(6), i(24), '0')
 text(j(6), i(25), '0')
 text(j(6), i(26), '-1')
 text(j(6), i(27), '-1')
 text(j(6), i(28), '-1')
 text(j(6), i(29), '-1')
 text(j(6), i(30), '-1')
 text(j(6), i(31), '-1')
 text(j(6), i(32), '-2')
 text(j(6), i(33), '-2')
 text(j(6), i(34), '-2')
 text(j(6), i(35), '-2')
 text(j(6), i(36), '-2')
 text(j(6), i(37), '-2')
 text(j(6), i(38), '-2')
 text(j(6), i(39), '-2')
 text(j(6), i(40), '-2')
 text(j(6), i(41), '-2')
 text(j(6), i(42), '-2')
 text(j(7), i(1), '2')
 text(j(7), i(2), '2')
 text(j(7), i(3), '2')
 text(j(7), i(4), '2')
 text(j(7), i(5), '2')
 text(j(7), i(6), '2')
 text(j(7), i(7), '2')
 text(j(7), i(8), '2')
 text(j(7), i(9), '2')
 text(j(7), i(10), '2')
 text(j(7), i(11), '2')
 text(j(7), i(12), '1')
 text(j(7), i(13), '1')
 text(j(7), i(14), '1')
 text(j(7), i(15), '1')
 text(j(7), i(16), '1')
 text(j(7), i(17), '1')
 text(j(7), i(18), '0')
 text(j(7), i(19), '0')
 text(j(7), i(20), '0')
 text(j(7), i(21), '0')
 text(j(7), i(22), '0')
 text(j(7), i(23), '0')
 text(j(7), i(24), '0')
 text(j(7), i(25), '0')
 text(j(7), i(26), '-1')
 text(j(7), i(27), '-1')
 text(j(7), i(28), '-1')
 text(j(7), i(29), '-1')
 text(j(7), i(30), '-1')
 text(j(7), i(31), '-1')
 text(j(7), i(32), '-2')
 text(j(7), i(33), '-2')
 text(j(7), i(34), '-2')
 text(j(7), i(35), '-2')
 text(j(7), i(36), '-2')
 text(j(7), i(37), '-2')
 text(j(7), i(38), '-2')
 text(j(7), i(39), '-2')
 text(j(7), i(40), '-2')
 text(j(7), i(41), '-2')
 text(j(7), i(42), '-2')
 text(j(8), i(1), '2')
 text(j(8), i(2), '2')
 text(j(8), i(3), '2')
 text(j(8), i(4), '2')
 text(j(8), i(5), '2')
 text(j(8), i(6), '2')
 text(j(8), i(7), '2')
 text(j(8), i(8), '2')
 text(j(8), i(9), '2')
 text(j(8), i(10), '1')
 text(j(8), i(11), '1')
 text(j(8), i(12), '1')
 text(j(8), i(13), '1')
 text(j(8), i(14), '1')
 text(j(8), i(15), '1')
 text(j(8), i(16), '1')
 text(j(8), i(17), '1')
 text(j(8), i(18), '0')
 text(j(8), i(19), '0')
 text(j(8), i(20), '0')
 text(j(8), i(21), '0')
 text(j(8), i(22), '0')
 text(j(8), i(23), '0')
 text(j(8), i(24), '0')
 text(j(8), i(25), '0')
 text(j(8), i(26), '-1')
 text(j(8), i(27), '-1')
 text(j(8), i(28), '-1')
 text(j(8), i(29), '-1')
 text(j(8), i(30), '-2')
 text(j(8), i(31), '-2')
 text(j(8), i(32), '-2')
 text(j(8), i(33), '-2')
 text(j(8), i(34), '-2')
 text(j(8), i(35), '-2')
 text(j(8), i(36), '-2')
 text(j(8), i(37), '-2')
 text(j(8), i(38), '-2')
 text(j(8), i(39), '-2')
 text(j(8), i(40), '-2')
 text(j(8), i(41), '-2')
 text(j(8), i(42), '-2')
 print -dpng pd_cpa.png
--- a/pipelined/srt/stine/srt4_pd2.m
+++ b/pipelined/srt/stine/srt4_pd2.m
@ -0,0 +1,333 @@
 %
 % Clear all variables and screen
 clear
 clf
 % Define the number of bits (input Dividend)
 n = 4;
 %
 % Define Divisor Range
 % Normalized Floating Point [Dmin,Dmax] = [1,2]
 % Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
 %
 Dminimum = 1.0/2;
 Dmaximum = 2.0/2;
 % Define an ulp
 ulp = 2^(-n);
 % radix = beta
 beta  = 4;
 % rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
 %
 % SD representations have alpha < beta - 1
 %
 % alpha = ceil(beta/2)  minimially redundant  
 % alpha = beta -1       maximally redundant (rho = 1)
 % alpha = (beta-1)/2    nonredundant
 % alpha > beta - 1      over-redundant
 % 
 rho = 2/3;
 % Calculation of max digit set
 alpha = rho*(beta-1);
 % Da contains digit set
 q = [];
 for i = -alpha:alpha
  q = [q; i];
 end
 % 4r(i-1)/D values
 hold on
 % figure(1)
 grid off
 for i = 1:length(q)
  x = -rho+q(i):ulp:rho+q(i);
  % Plot redundancy (overlap) Positive
  z = [rho+q(i),rho+q(i)];
  y = [x(length(x))-q(i),0];
  % Plot redundancy (overlap) Negative
  if (i ~= length(q))
    w = [-rho+q(i+1)-q(i+1),0];
    u = [-rho+q(i+1),-rho+q(i+1)];
    % plot(u,w,'b')
  end
  % plot(x,x-q(i))
  % plot(z,y,'r')
 end
 % title('Robertson Diagram for Radix-4 SRT Divison')
 Np   = 3;
 Nd   = 3;
 Dmin = Dminimum;
 Dmax = Dmaximum;
 ulpd = 2^(-Nd);
 ulpp = 2^(-Np);
 %
 % Plot Atkins P-D plot
 % Normalized Floating Point [Dmin,Dmax] = [1,2]
 % Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
 %
 Dmin = Dminimum;
 Dmax = Dmaximum;
 for i = 1:length(q)
  D = Dmin:ulpd:Dmax;
  P1 = (rho+q(i))*D;
  P2 = (-rho+q(i))*D;
  hold on
  p1 = plot(D,P1,'b');
  p2 = plot(D,P2,'r');
  axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
  xticks(D)
  p1.LineWidth = 2.0;
  p2.LineWidth = 2.0;
 end
 % Let's make x axis binary
 D = Dmin:ulpd:Dmax;
 j = [];
 for i=1:length(D)
    j = [j disp_bin(D(i), 1, 3)];
 end
 yk = [];
 yk2 = [];
 for i=-2.5:0.5:2.5;
    yk = [yk disp_bin(i, 3, 3)];
    yk2 = [yk2 i];
 end
 xtickangle(90)
 xticklabels(j)
 yticklabels(yk)
 % Let's draw allow points on PD plot
 % Positive Portions
 index = 1;
 i = 0:ulpp:rho*beta*Dmaximum;
 for j = Dmin:ulpd:Dmax
  plot(j*ones(1,length(i)),i,'k');
 end
 j = Dmin:ulpd:Dmax;
 for i = 0:ulpp:rho*beta*Dmaximum
  plot(j,i*ones(length(j)),'k');
 end
 % Negative Portions
 index = 1;
 i = 0:-ulpp:rho*-beta*Dmaximum;
 for j = Dmin:ulpd:Dmax
  plot(j*ones(1,length(i)),i,'k');
 end
 j = Dmin:ulpd:Dmax;
 for i = 0:-ulpp:-rho*beta*Dmaximum
  plot(j,i*ones(length(j)),'k');
 end
 % Labels and Printing
 xlh = xlabel(['Divisor (d)']);
 xlh.Position(2) = xlh.Position(2) - 0.1;
 xlh.FontSize = 18;
 ylh = ylabel(['P = 4 \cdot w_i']);
 ylh.Position(1) = ylh.Position(1)-0.02;
 ylh.FontSize = 18;
 % Containment Values (placed manually although not bad)
 m2 = [5/6 1.0 5/4 11/8 11/8];
 m1 = [1/4 1/4 1/2 1/2 1/2];
 m0 = [-1/4 -1/4 -1/2 -1/2 -1/2];
 m1b = [-5/6 -1 -5/4 -11/8 -11/8];
 x2 = Dmin:ulpd:Dmax;
 s2 = stairs(x2, m2);
 s2.Color = '#8f08d1';
 s2.LineWidth = 3.0;
 s1 = stairs(x2, m1);
 s1.Color = '#8f08d1';
 s1.LineWidth = 3.0;
 s0 = stairs(x2, m0);
 s0.Color = '#8f08d1';
 s0.LineWidth = 3.0;
 s1b = stairs(x2, m1b);
 s1b.Color = '#8f08d1';
 s1b.LineWidth = 3.0;
 % Place manually Quotient (ugh)
 j = Dmin+ulpd/2:ulpd:Dmax;
 i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum;
 text(j(1), i(1), '2')
 text(j(1), i(2), '2')
 text(j(1), i(3), '2')
 text(j(1), i(4), '2')
 text(j(1), i(5), '2')
 text(j(1), i(6), '2')
 text(j(1), i(7), '2')
 text(j(1), i(8), '2')
 text(j(1), i(9), '2')
 text(j(1), i(10), '2')
 text(j(1), i(11), '2')
 text(j(1), i(12), '2')
 text(j(1), i(13), '2')
 text(j(1), i(14), '2')
 error1 = text(j(1), i(15), 'Full Precision', 'FontSize', 16);
 text(j(1), i(16), '1')
 text(j(1), i(17), '1')
 text(j(1), i(18), '1')
 text(j(1), i(19), '1')
 text(j(1), i(20), '0')
 text(j(1), i(21), '0')
 text(j(1), i(22), '0')
 text(j(1), i(23), '0')
 text(j(1), i(24), '-1')
 text(j(1), i(25), '-1')
 text(j(1), i(26), '-1')
 text(j(1), i(27), '-1')
 error2 = text(j(1), i(28), 'Full Precision', 'FontSize', 16);
 text(j(1), i(29), '-2')
 text(j(1), i(30), '-2')
 text(j(1), i(31), '-2')
 text(j(1), i(32), '-2')
 text(j(1), i(33), '-2')
 text(j(1), i(34), '-2')
 text(j(1), i(35), '-2')
 text(j(1), i(36), '-2')
 text(j(1), i(37), '-2')
 text(j(1), i(38), '-2')
 text(j(1), i(39), '-2')
 text(j(1), i(40), '-2')
 text(j(1), i(41), '-2')
 text(j(1), i(42), '-2')
 text(j(2), i(1), '2')
 text(j(2), i(2), '2')
 text(j(2), i(3), '2')
 text(j(2), i(4), '2')
 text(j(2), i(5), '2')
 text(j(2), i(6), '2')
 text(j(2), i(7), '2')
 text(j(2), i(8), '2')
 text(j(2), i(9), '2')
 text(j(2), i(10), '2')
 text(j(2), i(11), '2')
 text(j(2), i(12), '2')
 text(j(2), i(13), '2')
 text(j(2), i(14), '1')
 text(j(2), i(15), '1')
 text(j(2), i(16), '1')
 text(j(2), i(17), '1')
 text(j(2), i(18), '1')
 text(j(2), i(19), '1')
 text(j(2), i(20), '0')
 text(j(2), i(21), '0')
 text(j(2), i(22), '0')
 text(j(2), i(23), '0')
 text(j(2), i(24), '-1')
 text(j(2), i(25), '-1')
 text(j(2), i(26), '-1')
 text(j(2), i(27), '-1')
 text(j(2), i(28), '-1')
 text(j(2), i(29), '-1')
 text(j(2), i(30), '-2')
 text(j(2), i(31), '-2')
 text(j(2), i(32), '-2')
 text(j(2), i(33), '-2')
 text(j(2), i(34), '-2')
 text(j(2), i(35), '-2')
 text(j(2), i(36), '-2')
 text(j(2), i(37), '-2')
 text(j(2), i(38), '-2')
 text(j(2), i(39), '-2')
 text(j(2), i(40), '-2')
 text(j(2), i(41), '-2')
 text(j(2), i(42), '-2')
 text(j(3), i(1), '2')
 text(j(3), i(2), '2')
 text(j(3), i(3), '2')
 text(j(3), i(4), '2')
 text(j(3), i(5), '2')
 text(j(3), i(6), '2')
 text(j(3), i(7), '2')
 text(j(3), i(8), '2')
 text(j(3), i(9), '2')
 text(j(3), i(10), '2')
 text(j(3), i(11), '2')
 text(j(3), i(12), '1')
 text(j(3), i(13), '1')
 text(j(3), i(14), '1')
 text(j(3), i(15), '1')
 text(j(3), i(16), '1')
 text(j(3), i(17), '1')
 text(j(3), i(18), '0')
 text(j(3), i(19), '0')
 text(j(3), i(20), '0')
 text(j(3), i(21), '0')
 text(j(3), i(22), '0')
 text(j(3), i(23), '0')
 text(j(3), i(24), '0')
 text(j(3), i(25), '0')
 text(j(3), i(26), '-1')
 text(j(3), i(27), '-1')
 text(j(3), i(28), '-1')
 text(j(3), i(29), '-1')
 text(j(3), i(30), '-1')
 text(j(3), i(31), '-1')
 text(j(3), i(32), '-2')
 text(j(3), i(33), '-2')
 text(j(3), i(34), '-2')
 text(j(3), i(35), '-2')
 text(j(3), i(36), '-2')
 text(j(3), i(37), '-2')
 text(j(3), i(38), '-2')
 text(j(3), i(39), '-2')
 text(j(3), i(40), '-2')
 text(j(3), i(41), '-2')
 text(j(3), i(42), '-2')
 text(j(4), i(1), '2')
 text(j(4), i(2), '2')
 text(j(4), i(3), '2')
 text(j(4), i(4), '2')
 text(j(4), i(5), '2')
 text(j(4), i(6), '2')
 text(j(4), i(7), '2')
 text(j(4), i(8), '2')
 text(j(4), i(9), '2')
 text(j(4), i(10), '2')
 text(j(4), i(11), '1')
 text(j(4), i(12), '1')
 text(j(4), i(13), '1')
 text(j(4), i(14), '1')
 text(j(4), i(15), '1')
 text(j(4), i(16), '1')
 text(j(4), i(17), '1')
 text(j(4), i(18), '0')
 text(j(4), i(19), '0')
 text(j(4), i(20), '0')
 text(j(4), i(21), '0')
 text(j(4), i(22), '0')
 text(j(4), i(23), '0')
 text(j(4), i(24), '0')
 text(j(4), i(25), '0')
 text(j(4), i(26), '-1')
 text(j(4), i(27), '-1')
 text(j(4), i(28), '-1')
 text(j(4), i(29), '-1')
 text(j(4), i(30), '-1')
 text(j(4), i(31), '-1')
 text(j(4), i(32), '-1')
 text(j(4), i(33), '-2')
 text(j(4), i(34), '-2')
 text(j(4), i(35), '-2')
 text(j(4), i(36), '-2')
 text(j(4), i(37), '-2')
 text(j(4), i(38), '-2')
 text(j(4), i(39), '-2')
 text(j(4), i(40), '-2')
 text(j(4), i(41), '-2')
 text(j(4), i(42), '-2')
 print -dpng pd_bad.png
--- a/pipelined/srt/stine/srt4_pd3.m
+++ b/pipelined/srt/stine/srt4_pd3.m
@ -0,0 +1,855 @@
 %
 % Clear all variables and screen
 clear
 clf
 % Define the number of bits (input Dividend)
 n = 4;
 %
 % Define Divisor Range
 % Normalized Floating Point [Dmin,Dmax] = [1,2]
 % Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
 %
 Dminimum = 1.0/2;
 Dmaximum = 2.0/2;
 % Define an ulp
 ulp = 2^(-n);
 % radix = beta
 beta  = 4;
 % rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
 %
 % SD representations have alpha < beta - 1
 %
 % alpha = ceil(beta/2)  minimially redundant  
 % alpha = beta -1       maximally redundant (rho = 1)
 % alpha = (beta-1)/2    nonredundant
 % alpha > beta - 1      over-redundant
 % 
 rho = 2/3;
 % Calculation of max digit set
 alpha = rho*(beta-1);
 % Da contains digit set
 q = [];
 for i = -alpha:alpha
  q = [q; i];
 end
 % 4r(i-1)/D values
 hold on
 % figure(1)
 grid off
 for i = 1:length(q)
  x = -rho+q(i):ulp:rho+q(i);
  % Plot redundancy (overlap) Positive
  z = [rho+q(i),rho+q(i)];
  y = [x(length(x))-q(i),0];
  % Plot redundancy (overlap) Negative
  if (i ~= length(q))
    w = [-rho+q(i+1)-q(i+1),0];
    u = [-rho+q(i+1),-rho+q(i+1)];
    % plot(u,w,'b')
  end
  % plot(x,x-q(i))
  % plot(z,y,'r')
 end
 % title('Robertson Diagram for Radix-4 SRT Divison')
 %
 % Plot Atkins P-D plot
 % Normalized Floating Point [Dmin,Dmax] = [1,2]
 % Normalized Fixed Point    [Dmin, Dmax] =[1/2,1]
 %
 Dmin = Dminimum;
 Dmax = Dmaximum;
 for i = 1:length(q)
  D = Dmin:ulp:Dmax;
  P1 = (rho+q(i))*D;
  P2 = (-rho+q(i))*D;
  hold on
  p1 = plot(D,P1,'b');
  p2 = plot(D,P2,'r');
  axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
  xticks(D)
  p1.LineWidth = 2.0;
  p2.LineWidth = 2.0;
 end
 % Let's make x axis binary
 j = [];
 for i=1:length(D)
    j = [j disp_bin(D(i), 1, 4)];
 end
 yk = [];
 yk2 = [];
 for i=-2.5:0.5:2.5;
    yk = [yk disp_bin(i, 3, 4)];
    yk2 = [yk2 i];
 end
 xtickangle(90)
 xticklabels(j)
 yticklabels(yk)
 Np   = 4;
 Nd   = 4;
 Dmin = Dminimum;
 Dmax = Dmaximum;
 ulpd = 2^(-Nd);
 ulpp = 2^(-Np);
 % Let's draw allow points on PD plot
 % Positive Portions
 index = 1;
 i = 0:ulpp:rho*beta*Dmaximum;
 for j = Dmin:ulpd:Dmax
  plot(j*ones(1,length(i)),i,'k');
 end
 j = Dmin:ulpd:Dmax;
 for i = 0:ulpp:rho*beta*Dmaximum
  plot(j,i*ones(length(j)),'k');
 end
 % Negative Portions
 index = 1;
 i = 0:-ulpp:rho*-beta*Dmaximum;
 for j = Dmin:ulpd:Dmax
  plot(j*ones(1,length(i)),i,'k');
 end
 j = Dmin:ulpd:Dmax;
 for i = 0:-ulpp:-rho*beta*Dmaximum
  plot(j,i*ones(length(j)),'k');
 end
 % Labels and Printing
 xlh = xlabel(['Divisor (d)']);
 xlh.Position(2) = xlh.Position(2) - 0.1;
 %xlh.FontSize = 18;
 ylh = ylabel(['P = 4 \cdot w_i']);
 ylh.Position(1) = ylh.Position(1)-0.02;
 %ylh.FontSize = 18;
 % Containment Values (placed manually although not bad)
 m2 = [3/4 7/8 15/16 1.0 9/8 19/16 5/4 6/4 6/4];
 m1 = [1/4 1/4 1/4 1/4 3/8 3/8 1/2 1/2 1/2];
 m0 = [-1/4 -3/8 -3/8 -3/8 -1/2 -1/2 -1/2 -1/2 -1/2];
 m1b = [-13/16 -15/16 -1 -9/8 -5/4 -5/4 -11/8 -6/4 -6/4];
 x2 = Dmin:ulpd:Dmax;
 s2 = stairs(x2, m2);
 s2.Color = '#8f08d1';
 s2.LineWidth = 3.0;
 s1 = stairs(x2, m1);
 s1.Color = '#8f08d1';
 s1.LineWidth = 3.0;
 s0 = stairs(x2, m0);
 s0.Color = '#8f08d1';
 s0.LineWidth = 3.0;
 s1b = stairs(x2, m1b);
 s1b.Color = '#8f08d1';
 s1b.LineWidth = 3.0;
 % Place manually Quotient (ugh)
 j = Dmin+ulpd/2:ulpd:Dmax;
 i = rho*beta*Dmaximum-ulpp:-ulpp:-rho*beta*Dmaximum;
 % 1
 text(j(1), i(1), '2')
 text(j(1), i(2), '2')
 text(j(1), i(3), '2')
 text(j(1), i(4), '2')
 text(j(1), i(5), '2')
 text(j(1), i(6), '2')
 text(j(1), i(7), '2')
 text(j(1), i(8), '2')
 text(j(1), i(9), '2')
 text(j(1), i(10), '2')
 text(j(1), i(11), '2')
 text(j(1), i(12), '2')
 text(j(1), i(13), '2')
 text(j(1), i(14), '2')
 text(j(1), i(15), '2')
 text(j(1), i(16), '2')
 text(j(1), i(17), '2')
 text(j(1), i(18), '2')
 text(j(1), i(19), '2')
 text(j(1), i(20), '2')
 text(j(1), i(21), '2')
 text(j(1), i(22), '2')
 text(j(1), i(23), '2')
 text(j(1), i(24), '2')
 text(j(1), i(25), '2')
 text(j(1), i(26), '2')
 text(j(1), i(27), '2')
 text(j(1), i(28), '2')
 text(j(1), i(29), '2')
 text(j(1), i(30), '2')
 text(j(1), i(31), '1')
 text(j(1), i(32), '1')
 text(j(1), i(33), '1')
 text(j(1), i(34), '1')
 text(j(1), i(35), '1')
 text(j(1), i(36), '1')
 text(j(1), i(37), '1')
 text(j(1), i(38), '1')
 text(j(1), i(39), '0')
 text(j(1), i(40), '0')
 text(j(1), i(41), '0')
 text(j(1), i(42), '0')
 text(j(1), i(43), '0')
 text(j(1), i(44), '0')
 text(j(1), i(45), '0')
 text(j(1), i(46), '0')
 text(j(1), i(47), '-1')
 text(j(1), i(48), '-1')
 text(j(1), i(49), '-1')
 text(j(1), i(50), '-1')
 text(j(1), i(51), '-1')
 text(j(1), i(52), '-1')
 text(j(1), i(53), '-1')
 text(j(1), i(54), '-1')
 text(j(1), i(55), '-1')
 text(j(1), i(56), '-2')
 text(j(1), i(57), '-2')
 text(j(1), i(58), '-2')
 text(j(1), i(59), '-2')
 text(j(1), i(60), '-2')
 text(j(1), i(61), '-2')
 text(j(1), i(62), '-2')
 text(j(1), i(63), '-2')
 text(j(1), i(64), '-2')
 text(j(1), i(65), '-2')
 text(j(1), i(66), '-2')
 text(j(1), i(67), '-2')
 text(j(1), i(68), '-2')
 text(j(1), i(69), '-2')
 text(j(1), i(70), '-2')
 text(j(1), i(71), '-2')
 text(j(1), i(72), '-2')
 text(j(1), i(73), '-2')
 text(j(1), i(74), '-2')
 text(j(1), i(75), '-2')
 text(j(1), i(76), '-2')
 text(j(1), i(77), '-2')
 text(j(1), i(78), '-2')
 text(j(1), i(79), '-2')
 text(j(1), i(80), '-2')
 text(j(1), i(81), '-2')
 text(j(1), i(82), '-2')
 text(j(1), i(83), '-2')
 text(j(1), i(84), '-2')
 text(j(2), i(1), '2')
 text(j(2), i(2), '2')
 text(j(2), i(3), '2')
 text(j(2), i(4), '2')
 text(j(2), i(5), '2')
 text(j(2), i(6), '2')
 text(j(2), i(7), '2')
 text(j(2), i(8), '2')
 text(j(2), i(9), '2')
 text(j(2), i(10), '2')
 text(j(2), i(11), '2')
 text(j(2), i(12), '2')
 text(j(2), i(13), '2')
 text(j(2), i(14), '2')
 text(j(2), i(15), '2')
 text(j(2), i(16), '2')
 text(j(2), i(17), '2')
 text(j(2), i(18), '2')
 text(j(2), i(19), '2')
 text(j(2), i(20), '2')
 text(j(2), i(21), '2')
 text(j(2), i(22), '2')
 text(j(2), i(23), '2')
 text(j(2), i(24), '2')
 text(j(2), i(25), '2')
 text(j(2), i(26), '2')
 text(j(2), i(27), '2')
 text(j(2), i(28), '2')
 text(j(2), i(29), '1')
 text(j(2), i(30), '1')
 text(j(2), i(31), '1')
 text(j(2), i(32), '1')
 text(j(2), i(33), '1')
 text(j(2), i(34), '1')
 text(j(2), i(35), '1')
 text(j(2), i(36), '1')
 text(j(2), i(37), '1')
 text(j(2), i(38), '1')
 text(j(2), i(39), '0')
 text(j(2), i(40), '0')
 text(j(2), i(41), '0')
 text(j(2), i(42), '0')
 text(j(2), i(43), '0')
 text(j(2), i(44), '0')
 text(j(2), i(45), '0')
 text(j(2), i(46), '0')
 text(j(2), i(47), '0')
 text(j(2), i(48), '0')
 text(j(2), i(49), '-1')
 text(j(2), i(50), '-1')
 text(j(2), i(51), '-1')
 text(j(2), i(52), '-1')
 text(j(2), i(53), '-1')
 text(j(2), i(54), '-1')
 text(j(2), i(55), '-1')
 text(j(2), i(56), '-1')
 text(j(2), i(57), '-1')
 text(j(2), i(58), '-2')
 text(j(2), i(59), '-2')
 text(j(2), i(60), '-2')
 text(j(2), i(61), '-2')
 text(j(2), i(62), '-2')
 text(j(2), i(63), '-2')
 text(j(2), i(64), '-2')
 text(j(2), i(65), '-2')
 text(j(2), i(66), '-2')
 text(j(2), i(67), '-2')
 text(j(2), i(68), '-2')
 text(j(2), i(69), '-2')
 text(j(2), i(70), '-2')
 text(j(2), i(71), '-2')
 text(j(2), i(72), '-2')
 text(j(2), i(73), '-2')
 text(j(2), i(74), '-2')
 text(j(2), i(75), '-2')
 text(j(2), i(76), '-2')
 text(j(2), i(77), '-2')
 text(j(2), i(78), '-2')
 text(j(2), i(79), '-2')
 text(j(2), i(80), '-2')
 text(j(2), i(81), '-2')
 text(j(2), i(82), '-2')
 text(j(2), i(83), '-2')
 text(j(2), i(84), '-2')
 % 3
 text(j(3), i(1), '2')
 text(j(3), i(2), '2')
 text(j(3), i(3), '2')
 text(j(3), i(4), '2')
 text(j(3), i(5), '2')
 text(j(3), i(6), '2')
 text(j(3), i(7), '2')
 text(j(3), i(8), '2')
 text(j(3), i(9), '2')
 text(j(3), i(10), '2')
 text(j(3), i(11), '2')
 text(j(3), i(12), '2')
 text(j(3), i(13), '2')
 text(j(3), i(14), '2')
 text(j(3), i(15), '2')
 text(j(3), i(16), '2')
 text(j(3), i(17), '2')
 text(j(3), i(18), '2')
 text(j(3), i(19), '2')
 text(j(3), i(20), '2')
 text(j(3), i(21), '2')
 text(j(3), i(22), '2')
 text(j(3), i(23), '2')
 text(j(3), i(24), '2')
 text(j(3), i(25), '2')
 text(j(3), i(26), '2')
 text(j(3), i(27), '2')
 text(j(3), i(28), '1')
 text(j(3), i(29), '1')
 text(j(3), i(30), '1')
 text(j(3), i(31), '1')
 text(j(3), i(32), '1')
 text(j(3), i(33), '1')
 text(j(3), i(34), '1')
 text(j(3), i(35), '1')
 text(j(3), i(36), '1')
 text(j(3), i(37), '1')
 text(j(3), i(38), '1')
 text(j(3), i(39), '0')
 text(j(3), i(40), '0')
 text(j(3), i(41), '0')
 text(j(3), i(42), '0')
 text(j(3), i(43), '0')
 text(j(3), i(44), '0')
 text(j(3), i(45), '0')
 text(j(3), i(46), '0')
 text(j(3), i(47), '0')
 text(j(3), i(48), '0')
 text(j(3), i(49), '-1')
 text(j(3), i(50), '-1')
 text(j(3), i(51), '-1')
 text(j(3), i(52), '-1')
 text(j(3), i(53), '-1')
 text(j(3), i(54), '-1')
 text(j(3), i(55), '-1')
 text(j(3), i(56), '-1')
 text(j(3), i(57), '-1')
 text(j(3), i(58), '-1')
 text(j(3), i(59), '-2')
 text(j(3), i(60), '-2')
 text(j(3), i(61), '-2')
 text(j(3), i(62), '-2')
 text(j(3), i(63), '-2')
 text(j(3), i(64), '-2')
 text(j(3), i(65), '-2')
 text(j(3), i(66), '-2')
 text(j(3), i(67), '-2')
 text(j(3), i(68), '-2')
 text(j(3), i(69), '-2')
 text(j(3), i(70), '-2')
 text(j(3), i(71), '-2')
 text(j(3), i(72), '-2')
 text(j(3), i(73), '-2')
 text(j(3), i(74), '-2')
 text(j(3), i(75), '-2')
 text(j(3), i(76), '-2')
 text(j(3), i(77), '-2')
 text(j(3), i(78), '-2')
 text(j(3), i(79), '-2')
 text(j(3), i(80), '-2')
 text(j(3), i(81), '-2')
 text(j(3), i(82), '-2')
 text(j(3), i(83), '-2')
 text(j(3), i(84), '-2')
 % 4
 text(j(4), i(1), '2')
 text(j(4), i(2), '2')
 text(j(4), i(3), '2')
 text(j(4), i(4), '2')
 text(j(4), i(5), '2')
 text(j(4), i(6), '2')
 text(j(4), i(7), '2')
 text(j(4), i(8), '2')
 text(j(4), i(9), '2')
 text(j(4), i(10), '2')
 text(j(4), i(11), '2')
 text(j(4), i(12), '2')
 text(j(4), i(13), '2')
 text(j(4), i(14), '2')
 text(j(4), i(15), '2')
 text(j(4), i(16), '2')
 text(j(4), i(17), '2')
 text(j(4), i(18), '2')
 text(j(4), i(19), '2')
 text(j(4), i(20), '2')
 text(j(4), i(21), '2')
 text(j(4), i(22), '2')
 text(j(4), i(23), '2')
 text(j(4), i(24), '2')
 text(j(4), i(25), '2')
 text(j(4), i(26), '2')
 text(j(4), i(27), '1')
 text(j(4), i(28), '1')
 text(j(4), i(29), '1')
 text(j(4), i(30), '1')
 text(j(4), i(31), '1')
 text(j(4), i(32), '1')
 text(j(4), i(33), '1')
 text(j(4), i(34), '1')
 text(j(4), i(35), '1')
 text(j(4), i(36), '1')
 text(j(4), i(37), '1')
 text(j(4), i(38), '1')
 text(j(4), i(39), '0')
 text(j(4), i(40), '0')
 text(j(4), i(41), '0')
 text(j(4), i(42), '0')
 text(j(4), i(43), '0')
 text(j(4), i(44), '0')
 text(j(4), i(45), '0')
 text(j(4), i(46), '0')
 text(j(4), i(47), '0')
 text(j(4), i(48), '0')
 text(j(4), i(49), '-1')
 text(j(4), i(50), '-1')
 text(j(4), i(51), '-1')
 text(j(4), i(52), '-1')
 text(j(4), i(53), '-1')
 text(j(4), i(54), '-1')
 text(j(4), i(55), '-1')
 text(j(4), i(56), '-1')
 text(j(4), i(57), '-1')
 text(j(4), i(58), '-1')
 text(j(4), i(59), '-1')
 text(j(4), i(60), '-1')
 text(j(4), i(61), '-2')
 text(j(4), i(62), '-2')
 text(j(4), i(63), '-2')
 text(j(4), i(64), '-2')
 text(j(4), i(65), '-2')
 text(j(4), i(66), '-2')
 text(j(4), i(67), '-2')
 text(j(4), i(68), '-2')
 text(j(4), i(69), '-2')
 text(j(4), i(70), '-2')
 text(j(4), i(71), '-2')
 text(j(4), i(72), '-2')
 text(j(4), i(73), '-2')
 text(j(4), i(74), '-2')
 text(j(4), i(75), '-2')
 text(j(4), i(76), '-2')
 text(j(4), i(77), '-2')
 text(j(4), i(78), '-2')
 text(j(4), i(79), '-2')
 text(j(4), i(80), '-2')
 text(j(4), i(81), '-2')
 text(j(4), i(82), '-2')
 text(j(4), i(83), '-2')
 text(j(4), i(84), '-2')
 % 5
 text(j(5), i(1), '2')
 text(j(5), i(2), '2')
 text(j(5), i(3), '2')
 text(j(5), i(4), '2')
 text(j(5), i(5), '2')
 text(j(5), i(6), '2')
 text(j(5), i(7), '2')
 text(j(5), i(8), '2')
 text(j(5), i(9), '2')
 text(j(5), i(10), '2')
 text(j(5), i(11), '2')
 text(j(5), i(12), '2')
 text(j(5), i(13), '2')
 text(j(5), i(14), '2')
 text(j(5), i(15), '2')
 text(j(5), i(16), '2')
 text(j(5), i(17), '2')
 text(j(5), i(18), '2')
 text(j(5), i(19), '2')
 text(j(5), i(20), '2')
 text(j(5), i(21), '2')
 text(j(5), i(22), '2')
 text(j(5), i(23), '2')
 text(j(5), i(24), '2')
 text(j(5), i(25), '1')
 text(j(5), i(26), '1')
 text(j(5), i(27), '1')
 text(j(5), i(28), '1')
 text(j(5), i(29), '1')
 text(j(5), i(30), '1')
 text(j(5), i(31), '1')
 text(j(5), i(32), '1')
 text(j(5), i(33), '1')
 text(j(5), i(34), '1')
 text(j(5), i(35), '1')
 text(j(5), i(36), '1')
 text(j(5), i(37), '0')
 text(j(5), i(38), '0')
 text(j(5), i(39), '0')
 text(j(5), i(40), '0')
 text(j(5), i(41), '0')
 text(j(5), i(42), '0')
 text(j(5), i(43), '0')
 text(j(5), i(44), '0')
 text(j(5), i(45), '0')
 text(j(5), i(46), '0')
 text(j(5), i(47), '0')
 text(j(5), i(48), '0')
 text(j(5), i(49), '0')
 text(j(5), i(50), '0')
 text(j(5), i(51), '-1')
 text(j(5), i(52), '-1')
 text(j(5), i(53), '-1')
 text(j(5), i(54), '-1')
 text(j(5), i(55), '-1')
 text(j(5), i(56), '-1')
 text(j(5), i(57), '-1')
 text(j(5), i(58), '-1')
 text(j(5), i(59), '-1')
 text(j(5), i(60), '-1')
 text(j(5), i(61), '-1')
 text(j(5), i(62), '-1')
 text(j(5), i(63), '-2')
 text(j(5), i(64), '-2')
 text(j(5), i(65), '-2')
 text(j(5), i(66), '-2')
 text(j(5), i(67), '-2')
 text(j(5), i(68), '-2')
 text(j(5), i(69), '-2')
 text(j(5), i(70), '-2')
 text(j(5), i(71), '-2')
 text(j(5), i(72), '-2')
 text(j(5), i(73), '-2')
 text(j(5), i(74), '-2')
 text(j(5), i(75), '-2')
 text(j(5), i(76), '-2')
 text(j(5), i(77), '-2')
 text(j(5), i(78), '-2')
 text(j(5), i(79), '-2')
 text(j(5), i(80), '-2')
 text(j(5), i(81), '-2')
 text(j(5), i(82), '-2')
 text(j(5), i(83), '-2')
 text(j(5), i(84), '-2')
 % 6
 text(j(6), i(1), '2')
 text(j(6), i(2), '2')
 text(j(6), i(3), '2')
 text(j(6), i(4), '2')
 text(j(6), i(5), '2')
 text(j(6), i(6), '2')
 text(j(6), i(7), '2')
 text(j(6), i(8), '2')
 text(j(6), i(9), '2')
 text(j(6), i(10), '2')
 text(j(6), i(11), '2')
 text(j(6), i(12), '2')
 text(j(6), i(13), '2')
 text(j(6), i(14), '2')
 text(j(6), i(15), '2')
 text(j(6), i(16), '2')
 text(j(6), i(17), '2')
 text(j(6), i(18), '2')
 text(j(6), i(19), '2')
 text(j(6), i(20), '2')
 text(j(6), i(21), '2')
 text(j(6), i(22), '2')
 text(j(6), i(23), '2')
 text(j(6), i(24), '1')
 text(j(6), i(25), '1')
 text(j(6), i(26), '1')
 text(j(6), i(27), '1')
 text(j(6), i(28), '1')
 text(j(6), i(29), '1')
 text(j(6), i(30), '1')
 text(j(6), i(31), '1')
 text(j(6), i(32), '1')
 text(j(6), i(33), '1')
 text(j(6), i(34), '1')
 text(j(6), i(35), '1')
 text(j(6), i(36), '1')
 text(j(6), i(37), '0')
 text(j(6), i(38), '0')
 text(j(6), i(39), '0')
 text(j(6), i(40), '0')
 text(j(6), i(41), '0')
 text(j(6), i(42), '0')
 text(j(6), i(43), '0')
 text(j(6), i(44), '0')
 text(j(6), i(45), '0')
 text(j(6), i(46), '0')
 text(j(6), i(47), '0')
 text(j(6), i(48), '0')
 text(j(6), i(49), '0')
 text(j(6), i(50), '0')
 text(j(6), i(51), '-1')
 text(j(6), i(52), '-1')
 text(j(6), i(53), '-1')
 text(j(6), i(54), '-1')
 text(j(6), i(55), '-1')
 text(j(6), i(56), '-1')
 text(j(6), i(57), '-1')
 text(j(6), i(58), '-1')
 text(j(6), i(59), '-1')
 text(j(6), i(60), '-1')
 text(j(6), i(61), '-1')
 text(j(6), i(62), '-1')
 text(j(6), i(63), '-2')
 text(j(6), i(64), '-2')
 text(j(6), i(65), '-2')
 text(j(6), i(66), '-2')
 text(j(6), i(67), '-2')
 text(j(6), i(68), '-2')
 text(j(6), i(69), '-2')
 text(j(6), i(70), '-2')
 text(j(6), i(71), '-2')
 text(j(6), i(72), '-2')
 text(j(6), i(73), '-2')
 text(j(6), i(74), '-2')
 text(j(6), i(75), '-2')
 text(j(6), i(76), '-2')
 text(j(6), i(77), '-2')
 text(j(6), i(78), '-2')
 text(j(6), i(79), '-2')
 text(j(6), i(80), '-2')
 text(j(6), i(81), '-2')
 text(j(6), i(82), '-2')
 text(j(6), i(83), '-2')
 text(j(6), i(84), '-2')
 % 7
 text(j(7), i(1), '2')
 text(j(7), i(2), '2')
 text(j(7), i(3), '2')
 text(j(7), i(4), '2')
 text(j(7), i(5), '2')
 text(j(7), i(6), '2')
 text(j(7), i(7), '2')
 text(j(7), i(8), '2')
 text(j(7), i(9), '2')
 text(j(7), i(10), '2')
 text(j(7), i(11), '2')
 text(j(7), i(12), '2')
 text(j(7), i(13), '2')
 text(j(7), i(14), '2')
 text(j(7), i(15), '2')
 text(j(7), i(16), '2')
 text(j(7), i(17), '2')
 text(j(7), i(18), '2')
 text(j(7), i(19), '2')
 text(j(7), i(20), '2')
 text(j(7), i(21), '2')
 text(j(7), i(22), '2')
 text(j(7), i(23), '1')
 text(j(7), i(24), '1')
 text(j(7), i(25), '1')
 text(j(7), i(26), '1')
 text(j(7), i(27), '1')
 text(j(7), i(28), '1')
 text(j(7), i(29), '1')
 text(j(7), i(30), '1')
 text(j(7), i(31), '1')
 text(j(7), i(32), '1')
 text(j(7), i(33), '1')
 text(j(7), i(34), '1')
 text(j(7), i(35), '0')
 text(j(7), i(36), '0')
 text(j(7), i(37), '0')
 text(j(7), i(38), '0')
 text(j(7), i(39), '0')
 text(j(7), i(40), '0')
 text(j(7), i(41), '0')
 text(j(7), i(42), '0')
 text(j(7), i(43), '0')
 text(j(7), i(44), '0')
 text(j(7), i(45), '0')
 text(j(7), i(46), '0')
 text(j(7), i(47), '0')
 text(j(7), i(48), '0')
 text(j(7), i(49), '0')
 text(j(7), i(50), '0')
 text(j(7), i(51), '-1')
 text(j(7), i(52), '-1')
 text(j(7), i(53), '-1')
 text(j(7), i(54), '-1')
 text(j(7), i(55), '-1')
 text(j(7), i(56), '-1')
 text(j(7), i(57), '-1')
 text(j(7), i(58), '-1')
 text(j(7), i(59), '-1')
 text(j(7), i(60), '-1')
 text(j(7), i(61), '-1')
 text(j(7), i(62), '-1')
 text(j(7), i(63), '-1')
 text(j(7), i(64), '-1')
 text(j(7), i(65), '-2')
 text(j(7), i(66), '-2')
 text(j(7), i(67), '-2')
 text(j(7), i(68), '-2')
 text(j(7), i(69), '-2')
 text(j(7), i(70), '-2')
 text(j(7), i(71), '-2')
 text(j(7), i(72), '-2')
 text(j(7), i(73), '-2')
 text(j(7), i(74), '-2')
 text(j(7), i(75), '-2')
 text(j(7), i(76), '-2')
 text(j(7), i(77), '-2')
 text(j(7), i(78), '-2')
 text(j(7), i(79), '-2')
 text(j(7), i(80), '-2')
 text(j(7), i(81), '-2')
 text(j(7), i(82), '-2')
 text(j(7), i(83), '-2')
 text(j(7), i(84), '-2')
 % 8
 text(j(8), i(1), '2')
 text(j(8), i(2), '2')
 text(j(8), i(3), '2')
 text(j(8), i(4), '2')
 text(j(8), i(5), '2')
 text(j(8), i(6), '2')
 text(j(8), i(7), '2')
 text(j(8), i(8), '2')
 text(j(8), i(9), '2')
 text(j(8), i(10), '2')
 text(j(8), i(11), '2')
 text(j(8), i(12), '2')
 text(j(8), i(13), '2')
 text(j(8), i(14), '2')
 text(j(8), i(15), '2')
 text(j(8), i(16), '2')
 text(j(8), i(17), '2')
 text(j(8), i(18), '2')
 text(j(8), i(19), '1')
 text(j(8), i(20), '1')
 text(j(8), i(21), '1')
 text(j(8), i(22), '1')
 text(j(8), i(23), '1')
 text(j(8), i(24), '1')
 text(j(8), i(25), '1')
 text(j(8), i(26), '1')
 text(j(8), i(27), '1')
 text(j(8), i(28), '1')
 text(j(8), i(29), '1')
 text(j(8), i(30), '1')
 text(j(8), i(31), '1')
 text(j(8), i(32), '1')
 text(j(8), i(33), '1')
 text(j(8), i(34), '1')
 text(j(8), i(35), '0')
 text(j(8), i(36), '0')
 text(j(8), i(37), '0')
 text(j(8), i(38), '0')
 text(j(8), i(39), '0')
 text(j(8), i(40), '0')
 text(j(8), i(41), '0')
 text(j(8), i(42), '0')
 text(j(8), i(43), '0')
 text(j(8), i(44), '0')
 text(j(8), i(45), '0')
 text(j(8), i(46), '0')
 text(j(8), i(47), '0')
 text(j(8), i(48), '0')
 text(j(8), i(49), '0')
 text(j(8), i(50), '0')
 text(j(8), i(51), '-1')
 text(j(8), i(52), '-1')
 text(j(8), i(53), '-1')
 text(j(8), i(54), '-1')
 text(j(8), i(55), '-1')
 text(j(8), i(56), '-1')
 text(j(8), i(57), '-1')
 text(j(8), i(58), '-1')
 text(j(8), i(59), '-1')
 text(j(8), i(60), '-1')
 text(j(8), i(61), '-1')
 text(j(8), i(62), '-1')
 text(j(8), i(63), '-1')
 text(j(8), i(64), '-1')
 text(j(8), i(65), '-1')
 text(j(8), i(66), '-1')
 text(j(8), i(67), '-2')
 text(j(8), i(68), '-2')
 text(j(8), i(69), '-2')
 text(j(8), i(70), '-2')
 text(j(8), i(71), '-2')
 text(j(8), i(72), '-2')
 text(j(8), i(73), '-2')
 text(j(8), i(74), '-2')
 text(j(8), i(75), '-2')
 text(j(8), i(76), '-2')
 text(j(8), i(77), '-2')
 text(j(8), i(78), '-2')
 text(j(8), i(79), '-2')
 text(j(8), i(80), '-2')
 text(j(8), i(81), '-2')
 text(j(8), i(82), '-2')
 text(j(8), i(83), '-2')
 text(j(8), i(84), '-2')
 orient('landscape')
 print -dpng 'pd_csa.png'
--- a/pipelined/srt/stine/srt4div
+++ b/pipelined/srt/stine/srt4div
--- a/pipelined/srt/stine/srt4div.c
+++ b/pipelined/srt/stine/srt4div.c
@ -0,0 +1,226 @@
 #include "disp.h"
 #include <math.h>
 // QSLC is for division by recuerrence for
 // r=4 using a CPA - See Table 5.9 EL
 int qslc (double prem, double d) {
  int q;
  // For Debugging
  printf("d  --> %lg\n", d);
  printf("rw --> %lg\n", prem);
  if ((d>=8.0)&&(d<9.0)) {
    if (prem>=6.0)
       q = 2;
    else if (prem>=2.0)
      q = 1;
    else if (prem>=-2.0)
      q = 0;
    else if (prem >= -6)
      q = -1;
    else 
      q = -2;
    return q;
  }
  if ((d>=9.0)&&(d<10.0)) {
    if (prem>=7)
       q = 2;
    else if (prem>=2.0)
      q = 1;
    else if (prem>=-2.0)
      q = 0;
    else if (prem >= 7.0)
      q = -1;
    else 
      q = -2;
    return q;
  }
  if ((d>=10.0)&&(d<11.0)) {
    if (prem>=8.0)
       q = 2;
    else if (prem>=2.0)
      q = 1;
    else if (prem>=-2.0)
      q = 0;
    else if (prem >= -8.0)
      q = -1;
    else 
      q = -2;
    return q;
  }
  if ((d>=11.0)&&(d<12.0)) {
    if (prem>=8.0)
       q = 2;
    else if (prem>=2.0)
      q = 1;
    else if (prem>=-2.0)
      q = 0;
    else if (prem >= -8.0)
      q = -1;
    else 
      q = -2;
    return q;
  }
  if ((d>=12.0)&&(d<13.0)) {
    if (prem>=10.0)
       q = 2;
    else if (prem>=4.0)
      q = 1;
    else if (prem>=-4.0)
      q = 0;
    else if (prem >= -10.0)
      q = -1;
    else 
      q = -2;
    return q;
  }
  if ((d>=13.0)&&(d<14.0)) {
    if (prem>=10.0)
       q = 2;
    else if (prem>=4.0)
      q = 1;
    else if (prem>=-4.0)
      q = 0;
    else if (prem >= -10.0)
      q = -1;
    else 
      q = -2;
    return q;
  }
  if ((d>=14.0)&&(d<15.0)) {
    if (prem>=10.0)
       q = 2;
    else if (prem>=4.0)
      q = 1;
    else if (prem>=-4.0)
      q = 0;
    else if (prem >= -10.0)
      q = -1;
    else 
      q = -2;
    return q;
  }
  if ((d>=15.0)&&(d<16.0)) {
    if (prem>=12.0)
       q = 2;
    else if (prem>=4.0)
      q = 1;
    else if (prem>=-4.0)
      q = 0;
    else if (prem >= -12.0)
      q = -1;
    else 
      q = -2;
    return q;
  }
 }
 /*
 This routine performs a radix-4 SRT division 
 algorithm.  The user inputs the numerator, the denominator, 
 and the number of iterations. It assumes that 0.5 <= D < 1.
 */
 int main(int argc, char* argv[]) {
   double P, N, D, Q, RQ, RD, RREM, scale;   
   int q;
   int num_iter, i;
   int prec;
   int radix = 4;
   if (argc < 5) {
      fprintf(stderr,
 	      "Usage: %s numerator denominator num_iterations prec\n", 
 	      argv[0]);
      exit(1);
   }
   sscanf(argv[1],"%lg", &N);
   sscanf(argv[2],"%lg", &D);
   sscanf(argv[3],"%d", &num_iter);
   sscanf(argv[4],"%d", &prec);
   // Round to precision
   N = rne(N, prec);
   D = rne(D, prec);
   printf("N = ");
   disp_bin(N, 3, prec, stdout);
   printf("\n");
   printf("D = ");
   disp_bin(D, 3, prec, stdout);
   printf("\n");
   Q = 0;
   P = N * pow(2.0, -log2(radix));
   printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n", 
 	  N, D, N/D, num_iter); 
   for (scale = 1, i = 0; i < num_iter; i++) {
     // Shift by r
     scale = scale * pow(2.0, -log2(radix));
     // (4*P)*8 because of footnote in Table 5.9, page 296 EL
     // i.e., real value = shown value / 8
     // D*16 since we use 4 bits of D (1 bit known)
     q = qslc(flr((radix * P) * 8, 3), D*16);
     printf("4*W[n] = ");
     disp_bin(radix*P, 3, prec, stdout);
     printf("\n");
     printf("q*D = ");      
     disp_bin(q*D, 3, prec, stdout);
     printf("\n");
     printf("W[n+1] = ");            
     disp_bin(P ,3, prec, stdout);
     printf("\n");
     // Recurrence
     P = radix * P - q * D;
     // OTFC
     Q = Q + q * scale;
     printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P); 
     printf("i = %d, q = %d", i, q);
     printf(", Q = ");
     disp_bin(Q, 3, prec, stdout);
     printf(", W = ");
     disp_bin(P, 3, prec, stdout);
     printf("\n\n");
   }
   // Is shifted partial remainder negative?
   if (P < 0) {
     Q = Q - pow(2.0, -prec);
     P = P + D;
     printf("\nCorrecting Negative Remainder\n"); 
     printf("Q = %1.18lf, W = %1.18lf\n", Q, P); 
     printf("Q = ");
     disp_bin(Q, 3, prec, stdout);
     printf(", W = ");
     disp_bin(P, 3, prec, stdout);
     printf("\n");
   }
   // Output Results
   RQ = flr(N/D, prec);
   // Since q_{computed} = q / radix, multiply by radix
   RD = Q * radix;
   printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
   printf("true = ");
   disp_bin(RQ, 3, prec, stdout);
   printf(", computed = ");
   disp_bin(RD, 3, prec, stdout);
   printf("\n\n");
   printf("REM = %1.18lf \n", P);
   printf("REM = ");
   disp_bin(P, 3, prec, stdout);
   printf("\n\n");
   return 0;
 }
--- a/pipelined/srt/stine/test_iter128.sv
+++ b/pipelined/srt/stine/test_iter128.sv
@ -0,0 +1,79 @@
 `include "idiv-config.vh"
 module tb;
   logic [127:0]  N, D;
   logic 	  clk;
   logic 	  reset;   
   logic 	  start;
   logic 	  S;   
   logic [127:0]   Q;
   logic [127:0]  rem0;
   logic 	 div0;
   logic 	 done;
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   logic [127:0]  Ncomp;
   logic [127:0]  Dcomp;
   logic [127:0]  Qcomp;
   logic [127:0]  Rcomp;   
   logic [31:0]  vectornum;
   logic [31:0]  errors;   
   intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
   initial 
     begin	
 	clk = 1'b0;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	vectornum = 0;
 	errors = 0;	
 	handle3 = $fopen("iter128_unsigned.out");
     end
   always @(posedge clk, posedge reset)
     begin
 	desc3 = handle3;	
 	#0  start = 1'b0;
 	#0  S = 1'b0;	
 	#0  reset = 1'b1;
 	#30 reset = 1'b0;
 	#30 N = 128'h0;
 	#0  D = 128'h0;	
 	for (i=0; i<`IDIV_TESTS; i=i+1)
 	  begin
 	     N = {$urandom(), $urandom(), $urandom(), $urandom()};
 	     D = {$urandom(), $urandom(), $urandom(), $urandom()};
 	     start <= 1'b1;
 	     // Wait 2 cycles (to be sure)
 	     repeat (2)
 	       @(posedge clk);
 	     start <= 1'b0;	     
 	     repeat (41)
 	       @(posedge clk);
 	     Ncomp = N;
 	     Dcomp = D;
 	     Qcomp = Ncomp/Dcomp;
 	     Rcomp = Ncomp%Dcomp;
 	     vectornum = vectornum + 1;
 	       if ((Q !== Qcomp)) begin
 	       errors = errors + 1;
 	     end
 	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
 		       N, D, Q, rem0, Qcomp, Rcomp, 
 		       (Q==Qcomp), (rem0==Rcomp));
 	  end // for (i=0; i<2, i=i+1)
 	$display("%d tests completed, %d errors", vectornum, errors);
 	$finish;	
     end 
 endmodule // tb
--- a/pipelined/srt/stine/test_iter128S.sv
+++ b/pipelined/srt/stine/test_iter128S.sv
@ -0,0 +1,90 @@
 `include "idiv-config.vh"
 module tb;
   logic [127:0]  N, D;
   logic 	  clk;
   logic 	  reset;   
   logic 	  start;
   logic 	  S;   
   logic [127:0]   Q;
   logic [127:0]  rem0;
   logic 	 div0;
   logic 	 done;
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   logic [31:0]  rnd1;
   logic [31:0]  rnd2;            
   logic [127:0] Ncomp;
   logic [127:0] Dcomp;
   logic [127:0] Qcomp;
   logic [127:0] Rcomp;
   logic [31:0]  vectornum;
   logic [31:0]  errors;   
   intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
   initial 
     begin	
 	clk = 1'b0;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	vectornum = 0;
 	errors = 0;	
 	handle3 = $fopen("iter128_signed.out");
     end
   /*
   // VCD generation for power estimation
   initial
     begin
        $dumpfile("iter128_signed.vcd");
 	$dumpvars (0,tb.dut);	
     end
    */      
   always @(posedge clk, posedge reset)
     begin
 	desc3 = handle3;	
 	#0  start = 1'b0;
 	#0  S = 1'b1;	
 	#0  reset = 1'b1;
 	#30 reset = 1'b0;
 	#30 N = 128'h0;
 	#0  D = 128'h0;	
 	for (i=0; i<`IDIV_TESTS; i=i+1)
 	  begin
 	     N = {$urandom(), $urandom(), $urandom(), $urandom()};
 	     D = {$urandom(), $urandom(), $urandom(), $urandom()};		
 	     start <= 1'b1;
 	     // Wait 2 cycles (to be sure)
 	     repeat (1)
 	       @(posedge clk);
 	     start <= 1'b0;	     
 	     repeat (65)
 	       @(posedge clk);
 	     Ncomp = N;
 	     Dcomp = D;
 	     Qcomp = $signed(Ncomp)/$signed(Dcomp);
 	     Rcomp = $signed(Ncomp)%$signed(Dcomp);	     
 	     vectornum = vectornum + 1;
 	       if ((Q !== Qcomp)) begin
 	       errors = errors + 1;
 	     end
 	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
 		       N, D, Q, rem0, Qcomp, Rcomp, 
 		       (Q==Qcomp), (rem0==Rcomp));
 	  end 
 	$display("%d tests completed, %d errors", vectornum, errors);
 	$finish;	
     end 
 endmodule // tb
--- a/pipelined/srt/stine/test_iter32.sv
+++ b/pipelined/srt/stine/test_iter32.sv
@ -0,0 +1,85 @@
 `include "idiv-config.vh"
 module tb;
   logic [31:0]  N, D;
   logic 	 clk;
   logic 	 reset;   
   logic 	 start;
   logic 	 S;   
   logic [31:0]  Q;
   logic [31:0]  rem0;
   logic 	 div0;
   logic 	 done;
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   logic [31:0]  Ncomp;
   logic [31:0]  Dcomp;
   logic [31:0]  Qcomp;
   logic [31:0]  Rcomp;   
   logic [31:0]  vectornum;     
   logic [31:0]  errors;   
   intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
   initial 
     begin	
 	clk = 1'b0;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	vectornum = 0;
 	errors = 0;	
 	handle3 = $fopen("iter32_unsigned.out");
     end
   always @(posedge clk, posedge reset)
     begin
 	desc3 = handle3;	
 	#0  start = 1'b0;
 	#0  S = 1'b0;	
 	#0  reset = 1'b1;
 	#30 reset = 1'b0;
 	#30 N = 32'h0;
 	#0  D = 32'h0;		
 	for (i=0; i<`IDIV_TESTS; i=i+1)
 	  begin
 	     N = $urandom;
 	     D = $urandom;
 	     start <= 1'b1;
 	     // Wait 2 cycles (to be sure)
 	     repeat (2)
 	       @(posedge clk);
 	     start <= 1'b0;
 	     repeat (41)
 	       @(posedge clk);
 	     Ncomp = N;
 	     Dcomp = D;
 	     Qcomp = Ncomp/Dcomp;
 	     Rcomp = Ncomp%Dcomp;
 	     if ((Q !== Qcomp)) begin
 	       errors = errors + 1;
 	     end
 	     vectornum = vectornum + 1;	     
 	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
 		       N, D, Q, rem0, Qcomp, Rcomp, 
 		       (Q==Qcomp), (rem0==Rcomp));
 	  end // for (i=0; i<2, i=i+1)
 	$display("%d tests completed, %d errors", vectornum, errors);
 	$finish;	
     end 
 endmodule // tb
--- a/pipelined/srt/stine/test_iter32S.sv
+++ b/pipelined/srt/stine/test_iter32S.sv
@ -0,0 +1,79 @@
 `include "idiv-config.vh"
 module tb;
   logic [31:0]  N, D;
   logic 	 clk;
   logic 	 reset;   
   logic 	 start;
   logic 	 S;   
   logic [31:0]  Q;
   logic [31:0]  rem0;
   logic 	 div0;
   logic 	 done;
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   logic [31:0]  Ncomp;
   logic [31:0]  Dcomp;
   logic [31:0]  Qcomp;
   logic [31:0]  Rcomp;   
   logic [31:0]  vectornum;
   logic [31:0]  errors;   
   intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
   initial 
     begin	
 	clk = 1'b0;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	vectornum = 0;
 	errors = 0;	
 	handle3 = $fopen("iter32_signed.out");
     end
   always @(posedge clk, posedge reset)
     begin
 	desc3 = handle3;	
 	#0  start = 1'b0;
 	#0  S = 1'b1;	
 	#0  reset = 1'b1;
 	#30 reset = 1'b0;
 	#30 N = 32'h0;
 	#0  D = 32'h0;	
 	for (i=0; i<`IDIV_TESTS; i=i+1)
 	  begin
 	     N = $urandom;
 	     D = $urandom;
 	     start <= 1'b1;
 	     // Wait 2 cycles (to be sure)
 	     repeat (2)
 	       @(posedge clk);
 	     start <= 1'b0;
 	     repeat (41)
 	       @(posedge clk);
 	     Ncomp = N;
 	     Dcomp = D;
 	     Qcomp = $signed(Ncomp)/$signed(Dcomp);
 	     Rcomp = $signed(Ncomp)%$signed(Dcomp);
 	       if ((Q !== Qcomp)) begin
 	       errors = errors + 1;
 	     end
 	     vectornum = vectornum + 1;	     
 	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
 		       N, D, Q, rem0, Qcomp, Rcomp, 
 		       (Q==Qcomp), (rem0==Rcomp));
 	  end // for (i=0; i<2, i=i+1)
 	$display("%d tests completed, %d errors", vectornum, errors);	
 	$finish;	
     end 
 endmodule // tb
--- a/pipelined/srt/stine/test_iter64.sv
+++ b/pipelined/srt/stine/test_iter64.sv
@ -0,0 +1,79 @@
 `include "idiv-config.vh"
 module tb;
   logic [63:0]  N, D;
   logic 	 clk;
   logic 	 reset;   
   logic 	 start;
   logic 	 S;   
   logic [63:0]  Q;
   logic [63:0]  rem0;
   logic 	 div0;
   logic 	 done;
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   logic [63:0]  Ncomp;
   logic [63:0]  Dcomp;
   logic [63:0]  Qcomp;
   logic [63:0]  Rcomp;   
   logic [31:0]  vectornum;
   logic [31:0]  errors;   
   intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
   initial 
     begin	
 	clk = 1'b0;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	vectornum = 0;
 	errors = 0;	
 	handle3 = $fopen("iter64_unsigned.out");
     end
   always @(posedge clk, posedge reset)
     begin
 	desc3 = handle3;	
 	#0  start = 1'b0;
 	#0  S = 1'b0;	
 	#0  reset = 1'b1;
 	#30 reset = 1'b0;
 	#30 N = 64'h0;
 	#0  D = 64'h0;	
 	for (i=0; i<`IDIV_TESTS; i=i+1)
 	  begin
 	     N = {$urandom(), $urandom()};
 	     D = {$urandom(), $urandom()};
 	     start <= 1'b1;
 	     // Wait 2 cycles (to be sure)
 	     repeat (2)
 	       @(posedge clk);
 	     start <= 1'b0;	     
 	     repeat (41)
 	       @(posedge clk);
 	     Ncomp = N;
 	     Dcomp = D;
 	     Qcomp = Ncomp/Dcomp;
 	     Rcomp = Ncomp%Dcomp;
 	     vectornum = vectornum + 1;
 	       if ((Q !== Qcomp)) begin
 	       errors = errors + 1;
 	     end
 	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
 		       N, D, Q, rem0, Qcomp, Rcomp, 
 		       (Q==Qcomp), (rem0==Rcomp));
 	  end // for (i=0; i<2, i=i+1)
 	$display("%d tests completed, %d errors", vectornum, errors);
 	$finish;	
     end 
 endmodule // tb
--- a/pipelined/srt/stine/test_iter64S.sv
+++ b/pipelined/srt/stine/test_iter64S.sv
@ -0,0 +1,79 @@
 `include "idiv-config.vh"
 module tb;
   logic [63:0]  N, D;
   logic 	 clk;
   logic 	 reset;   
   logic 	 start;
   logic 	 S;   
   logic [63:0]  Q;
   logic [63:0]  rem0;
   logic 	 div0;
   logic 	 done;
   integer 	 handle3;
   integer 	 desc3;
   integer 	 i;   
   logic [63:0]  Ncomp;
   logic [63:0]  Dcomp;
   logic [63:0]  Qcomp;
   logic [63:0]  Rcomp;
   logic [31:0]  vectornum;
   logic [31:0]  errors;   
   intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S);
   initial 
     begin	
 	clk = 1'b0;
 	forever #5 clk = ~clk;
     end
   initial
     begin
 	vectornum = 0;
 	errors = 0;
 	handle3 = $fopen("iter64_signed.out");
     end
   always @(posedge clk, posedge reset)
     begin
 	desc3 = handle3;	
 	#0  start = 1'b0;
 	#0  S = 1'b1;	
 	#0  reset = 1'b1;
 	#30 reset = 1'b0;
 	#30 N = 64'h0;
 	#0  D = 64'h0;	
 	for (i=0; i<`IDIV_TESTS; i=i+1)
 	  begin
 	     N = {$urandom(), $urandom()};
 	     D = {$urandom(), $urandom()};	     
 	     start <= 1'b1;
 	     // Wait 2 cycles (to be sure)
 	     repeat (2)
 	       @(posedge clk);
 	     start <= 1'b0;	     
 	     repeat (41)
 	       @(posedge clk);
 	     Ncomp = N;
 	     Dcomp = D;
 	     Qcomp = $signed(Ncomp)/$signed(Dcomp);
 	     Rcomp = $signed(Ncomp)%$signed(Dcomp);
 	     if ((Q !== Qcomp)) begin
 	       errors = errors + 1;
 	     end
 	     vectornum = vectornum + 1;	     
 	     $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", 
 		       N, D, Q, rem0, Qcomp, Rcomp, 
 		       (Q==Qcomp), (rem0==Rcomp));
 	  end // for (i=0; i<2, i=i+1)
 	$display("%d tests completed, %d errors", vectornum, errors);
 	$finish;
     end 
 endmodule // tb
--- a/pipelined/srt/stine/tmp
+++ b/pipelined/srt/stine/tmp
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@ -0,0 +1,219 @@
 `define DIVLEN 64
 /////////////
 // counter //
 /////////////
 // module counter(input  logic clk, 
 //                input  logic req, 
 //                output logic done);
 //    logic    [7:0]  count;
 //   // This block of control logic sequences the divider
 //   // through its iterations.  You may modify it if you
 //   // build a divider which completes in fewer iterations.
 //   // You are not responsible for the (trivial) circuit
 //   // design of the block.
 //   always @(posedge clk)
 //     begin
 //       if      (count == `DIVLEN + 2) done <= #1 1;
 //       else if (done | req) done <= #1 0;	
 //       if (req) count <= #1 0;
 //       else     count <= #1 count+1;
 //     end
 // endmodule
 ///////////
 // clock //
 ///////////
 module clock(clk);
  output clk;
  // Internal clk signal
  logic clk;
 endmodule
 //////////
 // testbench //
 //////////
 module testbench;
  logic              clk;
  logic              req;
  logic              done;
  logic              Int;
  logic [63:0]       a, b;
  logic [51:0]       afrac, bfrac;
  logic [10:0]       aExp, bExp;
  logic              asign, bsign;
  logic [51:0]       r;
  logic [63:0]       rInt;
  logic [`DIVLEN-1:0]  Quot;
  // Test parameters
  parameter MEM_SIZE = 40000;
  parameter MEM_WIDTH = 64+64+64+64;
  // INT TEST SIZES
  // `define memrem  63:0 
  // `define memr  127:64
  // `define memb  191:128
  // `define mema  255:192
  // FLOAT TEST SIZES
 <<<<<<< Updated upstream
  `define memr  63:0 
  `define memb  127:64
  `define mema  191:128
 =======
  // `define memr  63:0 
  // `define memb  127:64
  // `define mema  191:128
  // SQRT TEST SIZES 
  `define memr  63:0 
  `define mema  127:64
  `define memb  191:128
 >>>>>>> Stashed changes
  // Test logicisters
  logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
  logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
                            // bit field of an array 
  logic [63:0] correctr, nextr, diffn, diffp;
  logic [10:0] rExp;
  logic        rsign;
  integer testnum, errors;
 <<<<<<< Updated upstream
  assign Int = 1'b0;
 =======
  // Equip Int test or Sqrt test
  assign Int = 1'b0;
  assign Sqrt = 1'b1;
 >>>>>>> Stashed changes
  // Divider
  srt srt(.clk, .Start(req), 
                .Stall(1'b0), .Flush(1'b0), 
                .XExp(aExp), .YExp(bExp), .rExp,
                .XSign(asign), .YSign(bsign), .rsign,
                .SrcXFrac(afrac), .SrcYFrac(bfrac), 
                .SrcA(a), .SrcB(b), .Fmt(2'b00), 
 <<<<<<< Updated upstream
                .W64(1'b1), .Signed(1'b0), .Int, .Sqrt(1'b0), 
 =======
                .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, 
 >>>>>>> Stashed changes
                .Quot, .Rem(), .Flags(), .done);
  // Counter
  // counter counter(clk, req, done);
    initial
    forever
      begin
        clk = 1; #17;
        clk = 0; #16;
      end
  // Read test vectors from disk
  initial
    begin
      testnum = 0; 
      errors = 0;
 <<<<<<< Updated upstream
      $readmemh ("testvectors", Tests);
 =======
      $readmemh ("sqrttestvectors", Tests);
 >>>>>>> Stashed changes
      Vec = Tests[testnum];
      a = Vec[`mema];
      {asign, aExp, afrac} = a;
      b = Vec[`memb];
      {bsign, bExp, bfrac} = b;
      nextr = Vec[`memr];
      r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
      rInt = Quot;
      req <= #5 1;
    end
  // Apply directed test vectors read from file.
  always @(posedge clk) begin
    r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
    rInt = Quot;
    if (done) begin
      if (~Int & ~Sqrt) begin
        req <= #5 1;
        diffp = correctr[51:0] - r;
        diffn = r - correctr[51:0];
        if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
          begin
            errors = errors+1;
            $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
            $display("failed\n");
            $stop;
          end
        if (afrac === 52'hxxxxxxxxxxxxx)
          begin
            $display("%d Tests completed successfully", testnum);
            $stop;
          end
      end else if (~Sqrt) begin
        req <= #5 1;
        diffp = correctr[63:0] - rInt;
        diffn = rInt - correctr[63:0];
        if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
          begin
            errors = errors+1;
            $display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp);
            $display("failed\n");
            $stop;
          end
        if (afrac === 52'hxxxxxxxxxxxxx)
        begin
          $display("%d Tests completed successfully", testnum);
          $stop;
        end
      end else begin 
        req <= #5 1;
        diffp = correctr[51:0] - r;
        diffn = r - correctr[51:0];
 <<<<<<< Updated upstream
        if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
          begin
            errors = errors + 1;
            $display("result was %h, should be %h %h %h\n", rSqrt, correctr, diffn, diffp);
 =======
        if (rExp !== correctr[62:52]) // check if accurate to 1 ulp
          begin
            errors = errors + 1;
            $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
 >>>>>>> Stashed changes
            $display("failed\n");
            $stop;
          end
        if (afrac === 52'hxxxxxxxxxxxxx) begin 
          $display("%d Tests completed successfully", testnum);
          $stop; end 
      end
    end
    if (req) begin
      req <= #5 0;
      correctr = nextr;
      testnum = testnum+1;
      Vec = Tests[testnum];
      $display("a = %h  b = %h",a,b);
      a = Vec[`mema];
      {asign, aExp, afrac} = a;
      b = Vec[`memb];
      {bsign, bExp, bfrac} = b;
      nextr = Vec[`memr];
    end
  end
 endmodule
--- a/pipelined/srt/testgen.c
+++ b/pipelined/srt/testgen.c
@ -0,0 +1,94 @@
 /* testgen.c */
 /* Written 10/31/96 by David Harris
   This program creates test vectors for mantissa component
   of an IEEE floating point divider. 
   */
 /* #includes */
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 /* Constants */
 #define ENTRIES  17
 #define RANDOM_VECS 500
 /* Prototypes */
 void output(FILE *fptr, double a, double b, double r);
 void printhex(FILE *fptr, double x);
 double random_input(void);
 /* Main */
 void main(void)
 {
  FILE *fptr;
  double a, b, r;
  double list[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
 			  1.1, 1.2, 1.01, 1.001, 1.0001,
 			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
  int i, j;
  if ((fptr = fopen("testvectors","w")) == NULL) {
    fprintf(stderr, "Couldn't write testvectors file\n");
    exit(1);
  }
  for (i=0; i<ENTRIES; i++) {
    b = list[i];
    for (j=0; j<ENTRIES; j++) {
      a = list[j];
      r = a/b;
      output(fptr, a, b, r);
    }
  }
  for (i = 0; i< RANDOM_VECS; i++) {
    a = random_input();
    b = random_input();
    r = a/b;
    output(fptr, a, b, r);
  }
  fclose(fptr);
 }
 /* Functions */
 void output(FILE *fptr, double a, double b, double r)
 {
  printhex(fptr, a);
  fprintf(fptr, "_");
  printhex(fptr, b);
  fprintf(fptr, "_");
  printhex(fptr, r);
  fprintf(fptr, "\n");
 }
 void printhex(FILE *fptr, double m)
 {
  int i, val;
  while (m<1) m *= 2;
  while (m>2) m /= 2;
  for (i=0; i<52; i+=4) {
    m = m - floor(m);
    m = m * 16;
    val = (int)(m)%16;
    fprintf(fptr, "%x", val);
  }    
 }    
 double random_input(void)
 {
  return 1.0 + rand()/32767.0;
 }
		`@ -0,0 +1,2 @@`
							`verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/.sv ../src/generic/flop/.sv`
							`verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/.sv ../src/generic/flop/.sv`