diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile new file mode 100644 index 000000000..5d7898b17 --- /dev/null +++ b/pipelined/srt/Makefile @@ -0,0 +1,33 @@ +all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen + +sqrttestgen: sqrttestgen.c + gcc sqrttestgen.c -o sqrttestgen -lm + ./sqrttestgen + +testgen: testgen.c + gcc testgen.c -o testgen -lm + ./testgen + +exptestgen: exptestgen.c + gcc -o exptestgen exptestgen.c -lm + ./exptestgen + +qslc_r4a2: qslc_r4a2.c + gcc qslc_r4a2.c -o qslc_r4a2 -lm + ./qslc_r4a2 > qslc_r4a2.sv + +qslc_r4a2b: qslc_r4a2b.c + gcc qslc_r4a2b.c -o qslc_r4a2b -lm + ./qslc_r4a2b > qslc_r4a2b.tv + +qslc_sqrt_r4a2: qslc_sqrt_r4a2.c + gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm + ./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv + +inttestgen: inttestgen.c + gcc -lm -o inttestgen inttestgen.c + ./inttestgen + +clean: + rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen + diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c new file mode 100644 index 000000000..61fe74aa4 --- /dev/null +++ b/pipelined/srt/exptestgen.c @@ -0,0 +1,127 @@ +/* testgen.c */ + +/* Written 2/19/2022 by David Harris + + This program creates test vectors for mantissa and exponent components + of an IEEE floating point divider. + Builds upon program that creates test vectors for mantissa component only. + */ + +/* #includes */ + +#include +#include +#include + +/* Constants */ + +#define ENTRIES 17 +#define RANDOM_VECS 500 +// #define BIAS 1023 // Bias is for double precision + +/* Prototypes */ + +void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac); +void printhex(FILE *fptr, double x); +double random_input(void); +double random_input_e(void); + +/* Main */ + +void main(void) +{ + FILE *fptr; + // aExp & bExp are exponents + // aFrac & bFrac are mantissas + // rFrac is result of fractional divsion + // rExp is result of exponent division + double aFrac, bFrac, rFrac; + int aExp, bExp, rExp; + int aSign, bSign, rSign; + double mantissa[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, + 1.75, 1.875, 1.99999, + 1.1, 1.2, 1.01, 1.001, 1.0001, + 1/1.1, 1/1.5, 1/1.25, 1/1.125}; + int exponent[ENTRIES] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + int i, j; + int bias = 1023; + + if ((fptr = fopen("testvectors","w")) == NULL) { + fprintf(stderr, "Couldn't write testvectors file\n"); + exit(1); + } + + for (i=0; i2) m /= 2; + for (i=0; i +#include +#include + +/* Constants */ + +#define ENTRIES 10 +#define RANDOM_VECS 500 + +/* Prototypes */ + +void output(FILE *fptr, long a, long b, long r, long rem); +void printhex(FILE *fptr, long x); +double random_input(void); + +/* Main */ + +void main(void) +{ + FILE *fptr; + long a, b, r, rem; + long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255}; + int i, j; + + if ((fptr = fopen("inttestvectors","w")) == NULL) { + fprintf(stderr, "Couldn't write testvectors file\n"); + exit(1); + } + + for (i=0; i +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + printf("\tcase({D[5:3],Wmsbs})\n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf("\t\t10'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 12) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -4) + printf("0000"); + else if ((pla.tot) >= -13) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 14) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -15) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 15) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -16) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 16) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -18) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 18) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -22) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -24) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf("\tendcase\n"); + +} diff --git a/pipelined/srt/qslc_r4a2b b/pipelined/srt/qslc_r4a2b new file mode 100755 index 000000000..f719bbf47 Binary files /dev/null and b/pipelined/srt/qslc_r4a2b differ diff --git a/pipelined/srt/qslc_r4a2b.c b/pipelined/srt/qslc_r4a2b.c new file mode 100644 index 000000000..94a3a4cd4 --- /dev/null +++ b/pipelined/srt/qslc_r4a2b.c @@ -0,0 +1,190 @@ +/* + Program: qslc_r4a2.c + Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) + User: James E. Stine + +*/ + +#include +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 12) + printf("8"); + else if ((pla.tot) >= 4) + printf("4"); + else if ((pla.tot) >= -4) + printf("0"); + else if ((pla.tot) >= -13) + printf("2"); + else + printf("1"); + break; + case 1: + if ((pla.tot) >= 14) + printf("8"); + else if ((pla.tot) >= 4) + printf("4"); + else if ((pla.tot) >= -6) + printf("0"); + else if ((pla.tot) >= -15) + printf("2"); + else + printf("1"); + break; + case 2: + if ((pla.tot) >= 15) + printf("8"); + else if ((pla.tot) >= 4) + printf("4"); + else if ((pla.tot) >= -6) + printf("0"); + else if ((pla.tot) >= -16) + printf("2"); + else + printf("1"); + break; + case 3: + if ((pla.tot) >= 16) + printf("8"); + else if ((pla.tot) >= 4) + printf("4"); + else if ((pla.tot) >= -6) + printf("0"); + else if ((pla.tot) >= -18) + printf("2"); + else + printf("1"); + break; + case 4: + if ((pla.tot) >= 18) + printf("8"); + else if ((pla.tot) >= 6) + printf("4"); + else if ((pla.tot) >= -8) + printf("0"); + else if ((pla.tot) >= -20) + printf("2"); + else + printf("1"); + break; + case 5: + if ((pla.tot) >= 20) + printf("8"); + else if ((pla.tot) >= 6) + printf("4"); + else if ((pla.tot) >= -8) + printf("0"); + else if ((pla.tot) >= -20) + printf("2"); + else + printf("1"); + break; + case 6: + if ((pla.tot) >= 20) + printf("8"); + else if ((pla.tot) >= 8) + printf("4"); + else if ((pla.tot) >= -8) + printf("0"); + else if ((pla.tot) >= -22) + printf("2"); + else + printf("1"); + break; + case 7: + if ((pla.tot) >= 24) + printf("8"); + else if ((pla.tot) >= 8) + printf("4"); + else if ((pla.tot) >= -8) + printf("0"); + else if ((pla.tot) >= -24) + printf("2"); + else + printf("1"); + break; + default: printf ("X"); + + } + + printf("\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + +} diff --git a/pipelined/srt/qslc_r4a2b.tv b/pipelined/srt/qslc_r4a2b.tv new file mode 100644 index 000000000..b92d81e8e --- /dev/null +++ b/pipelined/srt/qslc_r4a2b.tv @@ -0,0 +1,1024 @@ +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 new file mode 100755 index 000000000..5cff70cdf Binary files /dev/null and b/pipelined/srt/qslc_sqrt_r4a2 differ diff --git a/pipelined/srt/qslc_sqrt_r4a2.c b/pipelined/srt/qslc_sqrt_r4a2.c new file mode 100644 index 000000000..252293cc0 --- /dev/null +++ b/pipelined/srt/qslc_sqrt_r4a2.c @@ -0,0 +1,198 @@ +/* + Program: qslc_r4a2.c + Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) + User: James E. Stine + +*/ + +#include +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + printf("\tcase({D[5:3],Wmsbs})\n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf("\t\t11'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -26) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 28) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -10) + printf("0000"); + else if ((pla.tot) >= -28) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -32) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -34) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 36) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -36) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -40) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -44) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 44) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -46) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf("\tendcase\n"); + +} diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv new file mode 100644 index 000000000..805dbbaeb --- /dev/null +++ b/pipelined/srt/qslc_sqrt_r4a2.sv @@ -0,0 +1,1026 @@ + case({D[5:3],Wmsbs}) + 11'b000_0000000: q = 4'b0000; + 11'b000_0000001: q = 4'b0000; + 11'b000_0000010: q = 4'b0000; + 11'b000_0000011: q = 4'b0000; + 11'b000_0000100: q = 4'b0000; + 11'b000_0000101: q = 4'b0000; + 11'b000_0000110: q = 4'b0000; + 11'b000_0000111: q = 4'b0000; + 11'b000_0001000: q = 4'b0100; + 11'b000_0001001: q = 4'b0100; + 11'b000_0001010: q = 4'b0100; + 11'b000_0001011: q = 4'b0100; + 11'b000_0001100: q = 4'b0100; + 11'b000_0001101: q = 4'b0100; + 11'b000_0001110: q = 4'b0100; + 11'b000_0001111: q = 4'b0100; + 11'b000_0010000: q = 4'b0100; + 11'b000_0010001: q = 4'b0100; + 11'b000_0010010: q = 4'b0100; + 11'b000_0010011: q = 4'b0100; + 11'b000_0010100: q = 4'b0100; + 11'b000_0010101: q = 4'b0100; + 11'b000_0010110: q = 4'b0100; + 11'b000_0010111: q = 4'b0100; + 11'b000_0011000: q = 4'b1000; + 11'b000_0011001: q = 4'b1000; + 11'b000_0011010: q = 4'b1000; + 11'b000_0011011: q = 4'b1000; + 11'b000_0011100: q = 4'b1000; + 11'b000_0011101: q = 4'b1000; + 11'b000_0011110: q = 4'b1000; + 11'b000_0011111: q = 4'b1000; + 11'b000_0100000: q = 4'b1000; + 11'b000_0100001: q = 4'b1000; + 11'b000_0100010: q = 4'b1000; + 11'b000_0100011: q = 4'b1000; + 11'b000_0100100: q = 4'b1000; + 11'b000_0100101: q = 4'b1000; + 11'b000_0100110: q = 4'b1000; + 11'b000_0100111: q = 4'b1000; + 11'b000_0101000: q = 4'b1000; + 11'b000_0101001: q = 4'b1000; + 11'b000_0101010: q = 4'b1000; + 11'b000_0101011: q = 4'b1000; + 11'b000_0101100: q = 4'b1000; + 11'b000_0101101: q = 4'b1000; + 11'b000_0101110: q = 4'b1000; + 11'b000_0101111: q = 4'b1000; + 11'b000_0110000: q = 4'b1000; + 11'b000_0110001: q = 4'b1000; + 11'b000_0110010: q = 4'b1000; + 11'b000_0110011: q = 4'b1000; + 11'b000_0110100: q = 4'b1000; + 11'b000_0110101: q = 4'b1000; + 11'b000_0110110: q = 4'b1000; + 11'b000_0110111: q = 4'b1000; + 11'b000_0111000: q = 4'b1000; + 11'b000_0111001: q = 4'b1000; + 11'b000_0111010: q = 4'b1000; + 11'b000_0111011: q = 4'b1000; + 11'b000_0111100: q = 4'b1000; + 11'b000_0111101: q = 4'b1000; + 11'b000_0111110: q = 4'b1000; + 11'b000_0111111: q = 4'b1000; + 11'b000_1000000: q = 4'b0001; + 11'b000_1000001: q = 4'b0001; + 11'b000_1000010: q = 4'b0001; + 11'b000_1000011: q = 4'b0001; + 11'b000_1000100: q = 4'b0001; + 11'b000_1000101: q = 4'b0001; + 11'b000_1000110: q = 4'b0001; + 11'b000_1000111: q = 4'b0001; + 11'b000_1001000: q = 4'b0001; + 11'b000_1001001: q = 4'b0001; + 11'b000_1001010: q = 4'b0001; + 11'b000_1001011: q = 4'b0001; + 11'b000_1001100: q = 4'b0001; + 11'b000_1001101: q = 4'b0001; + 11'b000_1001110: q = 4'b0001; + 11'b000_1001111: q = 4'b0001; + 11'b000_1010000: q = 4'b0001; + 11'b000_1010001: q = 4'b0001; + 11'b000_1010010: q = 4'b0001; + 11'b000_1010011: q = 4'b0001; + 11'b000_1010100: q = 4'b0001; + 11'b000_1010101: q = 4'b0001; + 11'b000_1010110: q = 4'b0001; + 11'b000_1010111: q = 4'b0001; + 11'b000_1011000: q = 4'b0001; + 11'b000_1011001: q = 4'b0001; + 11'b000_1011010: q = 4'b0001; + 11'b000_1011011: q = 4'b0001; + 11'b000_1011100: q = 4'b0001; + 11'b000_1011101: q = 4'b0001; + 11'b000_1011110: q = 4'b0001; + 11'b000_1011111: q = 4'b0001; + 11'b000_1100000: q = 4'b0001; + 11'b000_1100001: q = 4'b0001; + 11'b000_1100010: q = 4'b0001; + 11'b000_1100011: q = 4'b0001; + 11'b000_1100100: q = 4'b0001; + 11'b000_1100101: q = 4'b0001; + 11'b000_1100110: q = 4'b0010; + 11'b000_1100111: q = 4'b0010; + 11'b000_1101000: q = 4'b0010; + 11'b000_1101001: q = 4'b0010; + 11'b000_1101010: q = 4'b0010; + 11'b000_1101011: q = 4'b0010; + 11'b000_1101100: q = 4'b0010; + 11'b000_1101101: q = 4'b0010; + 11'b000_1101110: q = 4'b0010; + 11'b000_1101111: q = 4'b0010; + 11'b000_1110000: q = 4'b0010; + 11'b000_1110001: q = 4'b0010; + 11'b000_1110010: q = 4'b0010; + 11'b000_1110011: q = 4'b0010; + 11'b000_1110100: q = 4'b0010; + 11'b000_1110101: q = 4'b0010; + 11'b000_1110110: q = 4'b0010; + 11'b000_1110111: q = 4'b0010; + 11'b000_1111000: q = 4'b0000; + 11'b000_1111001: q = 4'b0000; + 11'b000_1111010: q = 4'b0000; + 11'b000_1111011: q = 4'b0000; + 11'b000_1111100: q = 4'b0000; + 11'b000_1111101: q = 4'b0000; + 11'b000_1111110: q = 4'b0000; + 11'b000_1111111: q = 4'b0000; + 11'b001_0000000: q = 4'b0000; + 11'b001_0000001: q = 4'b0000; + 11'b001_0000010: q = 4'b0000; + 11'b001_0000011: q = 4'b0000; + 11'b001_0000100: q = 4'b0000; + 11'b001_0000101: q = 4'b0000; + 11'b001_0000110: q = 4'b0000; + 11'b001_0000111: q = 4'b0000; + 11'b001_0001000: q = 4'b0100; + 11'b001_0001001: q = 4'b0100; + 11'b001_0001010: q = 4'b0100; + 11'b001_0001011: q = 4'b0100; + 11'b001_0001100: q = 4'b0100; + 11'b001_0001101: q = 4'b0100; + 11'b001_0001110: q = 4'b0100; + 11'b001_0001111: q = 4'b0100; + 11'b001_0010000: q = 4'b0100; + 11'b001_0010001: q = 4'b0100; + 11'b001_0010010: q = 4'b0100; + 11'b001_0010011: q = 4'b0100; + 11'b001_0010100: q = 4'b0100; + 11'b001_0010101: q = 4'b0100; + 11'b001_0010110: q = 4'b0100; + 11'b001_0010111: q = 4'b0100; + 11'b001_0011000: q = 4'b0100; + 11'b001_0011001: q = 4'b0100; + 11'b001_0011010: q = 4'b0100; + 11'b001_0011011: q = 4'b0100; + 11'b001_0011100: q = 4'b1000; + 11'b001_0011101: q = 4'b1000; + 11'b001_0011110: q = 4'b1000; + 11'b001_0011111: q = 4'b1000; + 11'b001_0100000: q = 4'b1000; + 11'b001_0100001: q = 4'b1000; + 11'b001_0100010: q = 4'b1000; + 11'b001_0100011: q = 4'b1000; + 11'b001_0100100: q = 4'b1000; + 11'b001_0100101: q = 4'b1000; + 11'b001_0100110: q = 4'b1000; + 11'b001_0100111: q = 4'b1000; + 11'b001_0101000: q = 4'b1000; + 11'b001_0101001: q = 4'b1000; + 11'b001_0101010: q = 4'b1000; + 11'b001_0101011: q = 4'b1000; + 11'b001_0101100: q = 4'b1000; + 11'b001_0101101: q = 4'b1000; + 11'b001_0101110: q = 4'b1000; + 11'b001_0101111: q = 4'b1000; + 11'b001_0110000: q = 4'b1000; + 11'b001_0110001: q = 4'b1000; + 11'b001_0110010: q = 4'b1000; + 11'b001_0110011: q = 4'b1000; + 11'b001_0110100: q = 4'b1000; + 11'b001_0110101: q = 4'b1000; + 11'b001_0110110: q = 4'b1000; + 11'b001_0110111: q = 4'b1000; + 11'b001_0111000: q = 4'b1000; + 11'b001_0111001: q = 4'b1000; + 11'b001_0111010: q = 4'b1000; + 11'b001_0111011: q = 4'b1000; + 11'b001_0111100: q = 4'b1000; + 11'b001_0111101: q = 4'b1000; + 11'b001_0111110: q = 4'b1000; + 11'b001_0111111: q = 4'b1000; + 11'b001_1000000: q = 4'b0001; + 11'b001_1000001: q = 4'b0001; + 11'b001_1000010: q = 4'b0001; + 11'b001_1000011: q = 4'b0001; + 11'b001_1000100: q = 4'b0001; + 11'b001_1000101: q = 4'b0001; + 11'b001_1000110: q = 4'b0001; + 11'b001_1000111: q = 4'b0001; + 11'b001_1001000: q = 4'b0001; + 11'b001_1001001: q = 4'b0001; + 11'b001_1001010: q = 4'b0001; + 11'b001_1001011: q = 4'b0001; + 11'b001_1001100: q = 4'b0001; + 11'b001_1001101: q = 4'b0001; + 11'b001_1001110: q = 4'b0001; + 11'b001_1001111: q = 4'b0001; + 11'b001_1010000: q = 4'b0001; + 11'b001_1010001: q = 4'b0001; + 11'b001_1010010: q = 4'b0001; + 11'b001_1010011: q = 4'b0001; + 11'b001_1010100: q = 4'b0001; + 11'b001_1010101: q = 4'b0001; + 11'b001_1010110: q = 4'b0001; + 11'b001_1010111: q = 4'b0001; + 11'b001_1011000: q = 4'b0001; + 11'b001_1011001: q = 4'b0001; + 11'b001_1011010: q = 4'b0001; + 11'b001_1011011: q = 4'b0001; + 11'b001_1011100: q = 4'b0001; + 11'b001_1011101: q = 4'b0001; + 11'b001_1011110: q = 4'b0001; + 11'b001_1011111: q = 4'b0001; + 11'b001_1100000: q = 4'b0001; + 11'b001_1100001: q = 4'b0001; + 11'b001_1100010: q = 4'b0001; + 11'b001_1100011: q = 4'b0001; + 11'b001_1100100: q = 4'b0010; + 11'b001_1100101: q = 4'b0010; + 11'b001_1100110: q = 4'b0010; + 11'b001_1100111: q = 4'b0010; + 11'b001_1101000: q = 4'b0010; + 11'b001_1101001: q = 4'b0010; + 11'b001_1101010: q = 4'b0010; + 11'b001_1101011: q = 4'b0010; + 11'b001_1101100: q = 4'b0010; + 11'b001_1101101: q = 4'b0010; + 11'b001_1101110: q = 4'b0010; + 11'b001_1101111: q = 4'b0010; + 11'b001_1110000: q = 4'b0010; + 11'b001_1110001: q = 4'b0010; + 11'b001_1110010: q = 4'b0010; + 11'b001_1110011: q = 4'b0010; + 11'b001_1110100: q = 4'b0010; + 11'b001_1110101: q = 4'b0010; + 11'b001_1110110: q = 4'b0000; + 11'b001_1110111: q = 4'b0000; + 11'b001_1111000: q = 4'b0000; + 11'b001_1111001: q = 4'b0000; + 11'b001_1111010: q = 4'b0000; + 11'b001_1111011: q = 4'b0000; + 11'b001_1111100: q = 4'b0000; + 11'b001_1111101: q = 4'b0000; + 11'b001_1111110: q = 4'b0000; + 11'b001_1111111: q = 4'b0000; + 11'b010_0000000: q = 4'b0000; + 11'b010_0000001: q = 4'b0000; + 11'b010_0000010: q = 4'b0000; + 11'b010_0000011: q = 4'b0000; + 11'b010_0000100: q = 4'b0000; + 11'b010_0000101: q = 4'b0000; + 11'b010_0000110: q = 4'b0000; + 11'b010_0000111: q = 4'b0000; + 11'b010_0001000: q = 4'b0100; + 11'b010_0001001: q = 4'b0100; + 11'b010_0001010: q = 4'b0100; + 11'b010_0001011: q = 4'b0100; + 11'b010_0001100: q = 4'b0100; + 11'b010_0001101: q = 4'b0100; + 11'b010_0001110: q = 4'b0100; + 11'b010_0001111: q = 4'b0100; + 11'b010_0010000: q = 4'b0100; + 11'b010_0010001: q = 4'b0100; + 11'b010_0010010: q = 4'b0100; + 11'b010_0010011: q = 4'b0100; + 11'b010_0010100: q = 4'b0100; + 11'b010_0010101: q = 4'b0100; + 11'b010_0010110: q = 4'b0100; + 11'b010_0010111: q = 4'b0100; + 11'b010_0011000: q = 4'b0100; + 11'b010_0011001: q = 4'b0100; + 11'b010_0011010: q = 4'b0100; + 11'b010_0011011: q = 4'b0100; + 11'b010_0011100: q = 4'b0100; + 11'b010_0011101: q = 4'b0100; + 11'b010_0011110: q = 4'b0100; + 11'b010_0011111: q = 4'b0100; + 11'b010_0100000: q = 4'b1000; + 11'b010_0100001: q = 4'b1000; + 11'b010_0100010: q = 4'b1000; + 11'b010_0100011: q = 4'b1000; + 11'b010_0100100: q = 4'b1000; + 11'b010_0100101: q = 4'b1000; + 11'b010_0100110: q = 4'b1000; + 11'b010_0100111: q = 4'b1000; + 11'b010_0101000: q = 4'b1000; + 11'b010_0101001: q = 4'b1000; + 11'b010_0101010: q = 4'b1000; + 11'b010_0101011: q = 4'b1000; + 11'b010_0101100: q = 4'b1000; + 11'b010_0101101: q = 4'b1000; + 11'b010_0101110: q = 4'b1000; + 11'b010_0101111: q = 4'b1000; + 11'b010_0110000: q = 4'b1000; + 11'b010_0110001: q = 4'b1000; + 11'b010_0110010: q = 4'b1000; + 11'b010_0110011: q = 4'b1000; + 11'b010_0110100: q = 4'b1000; + 11'b010_0110101: q = 4'b1000; + 11'b010_0110110: q = 4'b1000; + 11'b010_0110111: q = 4'b1000; + 11'b010_0111000: q = 4'b1000; + 11'b010_0111001: q = 4'b1000; + 11'b010_0111010: q = 4'b1000; + 11'b010_0111011: q = 4'b1000; + 11'b010_0111100: q = 4'b1000; + 11'b010_0111101: q = 4'b1000; + 11'b010_0111110: q = 4'b1000; + 11'b010_0111111: q = 4'b1000; + 11'b010_1000000: q = 4'b0001; + 11'b010_1000001: q = 4'b0001; + 11'b010_1000010: q = 4'b0001; + 11'b010_1000011: q = 4'b0001; + 11'b010_1000100: q = 4'b0001; + 11'b010_1000101: q = 4'b0001; + 11'b010_1000110: q = 4'b0001; + 11'b010_1000111: q = 4'b0001; + 11'b010_1001000: q = 4'b0001; + 11'b010_1001001: q = 4'b0001; + 11'b010_1001010: q = 4'b0001; + 11'b010_1001011: q = 4'b0001; + 11'b010_1001100: q = 4'b0001; + 11'b010_1001101: q = 4'b0001; + 11'b010_1001110: q = 4'b0001; + 11'b010_1001111: q = 4'b0001; + 11'b010_1010000: q = 4'b0001; + 11'b010_1010001: q = 4'b0001; + 11'b010_1010010: q = 4'b0001; + 11'b010_1010011: q = 4'b0001; + 11'b010_1010100: q = 4'b0001; + 11'b010_1010101: q = 4'b0001; + 11'b010_1010110: q = 4'b0001; + 11'b010_1010111: q = 4'b0001; + 11'b010_1011000: q = 4'b0001; + 11'b010_1011001: q = 4'b0001; + 11'b010_1011010: q = 4'b0001; + 11'b010_1011011: q = 4'b0001; + 11'b010_1011100: q = 4'b0001; + 11'b010_1011101: q = 4'b0001; + 11'b010_1011110: q = 4'b0001; + 11'b010_1011111: q = 4'b0001; + 11'b010_1100000: q = 4'b0010; + 11'b010_1100001: q = 4'b0010; + 11'b010_1100010: q = 4'b0010; + 11'b010_1100011: q = 4'b0010; + 11'b010_1100100: q = 4'b0010; + 11'b010_1100101: q = 4'b0010; + 11'b010_1100110: q = 4'b0010; + 11'b010_1100111: q = 4'b0010; + 11'b010_1101000: q = 4'b0010; + 11'b010_1101001: q = 4'b0010; + 11'b010_1101010: q = 4'b0010; + 11'b010_1101011: q = 4'b0010; + 11'b010_1101100: q = 4'b0010; + 11'b010_1101101: q = 4'b0010; + 11'b010_1101110: q = 4'b0010; + 11'b010_1101111: q = 4'b0010; + 11'b010_1110000: q = 4'b0010; + 11'b010_1110001: q = 4'b0010; + 11'b010_1110010: q = 4'b0010; + 11'b010_1110011: q = 4'b0010; + 11'b010_1110100: q = 4'b0000; + 11'b010_1110101: q = 4'b0000; + 11'b010_1110110: q = 4'b0000; + 11'b010_1110111: q = 4'b0000; + 11'b010_1111000: q = 4'b0000; + 11'b010_1111001: q = 4'b0000; + 11'b010_1111010: q = 4'b0000; + 11'b010_1111011: q = 4'b0000; + 11'b010_1111100: q = 4'b0000; + 11'b010_1111101: q = 4'b0000; + 11'b010_1111110: q = 4'b0000; + 11'b010_1111111: q = 4'b0000; + 11'b011_0000000: q = 4'b0000; + 11'b011_0000001: q = 4'b0000; + 11'b011_0000010: q = 4'b0000; + 11'b011_0000011: q = 4'b0000; + 11'b011_0000100: q = 4'b0000; + 11'b011_0000101: q = 4'b0000; + 11'b011_0000110: q = 4'b0000; + 11'b011_0000111: q = 4'b0000; + 11'b011_0001000: q = 4'b0100; + 11'b011_0001001: q = 4'b0100; + 11'b011_0001010: q = 4'b0100; + 11'b011_0001011: q = 4'b0100; + 11'b011_0001100: q = 4'b0100; + 11'b011_0001101: q = 4'b0100; + 11'b011_0001110: q = 4'b0100; + 11'b011_0001111: q = 4'b0100; + 11'b011_0010000: q = 4'b0100; + 11'b011_0010001: q = 4'b0100; + 11'b011_0010010: q = 4'b0100; + 11'b011_0010011: q = 4'b0100; + 11'b011_0010100: q = 4'b0100; + 11'b011_0010101: q = 4'b0100; + 11'b011_0010110: q = 4'b0100; + 11'b011_0010111: q = 4'b0100; + 11'b011_0011000: q = 4'b0100; + 11'b011_0011001: q = 4'b0100; + 11'b011_0011010: q = 4'b0100; + 11'b011_0011011: q = 4'b0100; + 11'b011_0011100: q = 4'b0100; + 11'b011_0011101: q = 4'b0100; + 11'b011_0011110: q = 4'b0100; + 11'b011_0011111: q = 4'b0100; + 11'b011_0100000: q = 4'b1000; + 11'b011_0100001: q = 4'b1000; + 11'b011_0100010: q = 4'b1000; + 11'b011_0100011: q = 4'b1000; + 11'b011_0100100: q = 4'b1000; + 11'b011_0100101: q = 4'b1000; + 11'b011_0100110: q = 4'b1000; + 11'b011_0100111: q = 4'b1000; + 11'b011_0101000: q = 4'b1000; + 11'b011_0101001: q = 4'b1000; + 11'b011_0101010: q = 4'b1000; + 11'b011_0101011: q = 4'b1000; + 11'b011_0101100: q = 4'b1000; + 11'b011_0101101: q = 4'b1000; + 11'b011_0101110: q = 4'b1000; + 11'b011_0101111: q = 4'b1000; + 11'b011_0110000: q = 4'b1000; + 11'b011_0110001: q = 4'b1000; + 11'b011_0110010: q = 4'b1000; + 11'b011_0110011: q = 4'b1000; + 11'b011_0110100: q = 4'b1000; + 11'b011_0110101: q = 4'b1000; + 11'b011_0110110: q = 4'b1000; + 11'b011_0110111: q = 4'b1000; + 11'b011_0111000: q = 4'b1000; + 11'b011_0111001: q = 4'b1000; + 11'b011_0111010: q = 4'b1000; + 11'b011_0111011: q = 4'b1000; + 11'b011_0111100: q = 4'b1000; + 11'b011_0111101: q = 4'b1000; + 11'b011_0111110: q = 4'b1000; + 11'b011_0111111: q = 4'b1000; + 11'b011_1000000: q = 4'b0001; + 11'b011_1000001: q = 4'b0001; + 11'b011_1000010: q = 4'b0001; + 11'b011_1000011: q = 4'b0001; + 11'b011_1000100: q = 4'b0001; + 11'b011_1000101: q = 4'b0001; + 11'b011_1000110: q = 4'b0001; + 11'b011_1000111: q = 4'b0001; + 11'b011_1001000: q = 4'b0001; + 11'b011_1001001: q = 4'b0001; + 11'b011_1001010: q = 4'b0001; + 11'b011_1001011: q = 4'b0001; + 11'b011_1001100: q = 4'b0001; + 11'b011_1001101: q = 4'b0001; + 11'b011_1001110: q = 4'b0001; + 11'b011_1001111: q = 4'b0001; + 11'b011_1010000: q = 4'b0001; + 11'b011_1010001: q = 4'b0001; + 11'b011_1010010: q = 4'b0001; + 11'b011_1010011: q = 4'b0001; + 11'b011_1010100: q = 4'b0001; + 11'b011_1010101: q = 4'b0001; + 11'b011_1010110: q = 4'b0001; + 11'b011_1010111: q = 4'b0001; + 11'b011_1011000: q = 4'b0001; + 11'b011_1011001: q = 4'b0001; + 11'b011_1011010: q = 4'b0001; + 11'b011_1011011: q = 4'b0001; + 11'b011_1011100: q = 4'b0001; + 11'b011_1011101: q = 4'b0001; + 11'b011_1011110: q = 4'b0010; + 11'b011_1011111: q = 4'b0010; + 11'b011_1100000: q = 4'b0010; + 11'b011_1100001: q = 4'b0010; + 11'b011_1100010: q = 4'b0010; + 11'b011_1100011: q = 4'b0010; + 11'b011_1100100: q = 4'b0010; + 11'b011_1100101: q = 4'b0010; + 11'b011_1100110: q = 4'b0010; + 11'b011_1100111: q = 4'b0010; + 11'b011_1101000: q = 4'b0010; + 11'b011_1101001: q = 4'b0010; + 11'b011_1101010: q = 4'b0010; + 11'b011_1101011: q = 4'b0010; + 11'b011_1101100: q = 4'b0010; + 11'b011_1101101: q = 4'b0010; + 11'b011_1101110: q = 4'b0010; + 11'b011_1101111: q = 4'b0010; + 11'b011_1110000: q = 4'b0010; + 11'b011_1110001: q = 4'b0010; + 11'b011_1110010: q = 4'b0010; + 11'b011_1110011: q = 4'b0010; + 11'b011_1110100: q = 4'b0000; + 11'b011_1110101: q = 4'b0000; + 11'b011_1110110: q = 4'b0000; + 11'b011_1110111: q = 4'b0000; + 11'b011_1111000: q = 4'b0000; + 11'b011_1111001: q = 4'b0000; + 11'b011_1111010: q = 4'b0000; + 11'b011_1111011: q = 4'b0000; + 11'b011_1111100: q = 4'b0000; + 11'b011_1111101: q = 4'b0000; + 11'b011_1111110: q = 4'b0000; + 11'b011_1111111: q = 4'b0000; + 11'b100_0000000: q = 4'b0000; + 11'b100_0000001: q = 4'b0000; + 11'b100_0000010: q = 4'b0000; + 11'b100_0000011: q = 4'b0000; + 11'b100_0000100: q = 4'b0000; + 11'b100_0000101: q = 4'b0000; + 11'b100_0000110: q = 4'b0000; + 11'b100_0000111: q = 4'b0000; + 11'b100_0001000: q = 4'b0000; + 11'b100_0001001: q = 4'b0000; + 11'b100_0001010: q = 4'b0000; + 11'b100_0001011: q = 4'b0000; + 11'b100_0001100: q = 4'b0100; + 11'b100_0001101: q = 4'b0100; + 11'b100_0001110: q = 4'b0100; + 11'b100_0001111: q = 4'b0100; + 11'b100_0010000: q = 4'b0100; + 11'b100_0010001: q = 4'b0100; + 11'b100_0010010: q = 4'b0100; + 11'b100_0010011: q = 4'b0100; + 11'b100_0010100: q = 4'b0100; + 11'b100_0010101: q = 4'b0100; + 11'b100_0010110: q = 4'b0100; + 11'b100_0010111: q = 4'b0100; + 11'b100_0011000: q = 4'b0100; + 11'b100_0011001: q = 4'b0100; + 11'b100_0011010: q = 4'b0100; + 11'b100_0011011: q = 4'b0100; + 11'b100_0011100: q = 4'b0100; + 11'b100_0011101: q = 4'b0100; + 11'b100_0011110: q = 4'b0100; + 11'b100_0011111: q = 4'b0100; + 11'b100_0100000: q = 4'b0100; + 11'b100_0100001: q = 4'b0100; + 11'b100_0100010: q = 4'b0100; + 11'b100_0100011: q = 4'b0100; + 11'b100_0100100: q = 4'b1000; + 11'b100_0100101: q = 4'b1000; + 11'b100_0100110: q = 4'b1000; + 11'b100_0100111: q = 4'b1000; + 11'b100_0101000: q = 4'b1000; + 11'b100_0101001: q = 4'b1000; + 11'b100_0101010: q = 4'b1000; + 11'b100_0101011: q = 4'b1000; + 11'b100_0101100: q = 4'b1000; + 11'b100_0101101: q = 4'b1000; + 11'b100_0101110: q = 4'b1000; + 11'b100_0101111: q = 4'b1000; + 11'b100_0110000: q = 4'b1000; + 11'b100_0110001: q = 4'b1000; + 11'b100_0110010: q = 4'b1000; + 11'b100_0110011: q = 4'b1000; + 11'b100_0110100: q = 4'b1000; + 11'b100_0110101: q = 4'b1000; + 11'b100_0110110: q = 4'b1000; + 11'b100_0110111: q = 4'b1000; + 11'b100_0111000: q = 4'b1000; + 11'b100_0111001: q = 4'b1000; + 11'b100_0111010: q = 4'b1000; + 11'b100_0111011: q = 4'b1000; + 11'b100_0111100: q = 4'b1000; + 11'b100_0111101: q = 4'b1000; + 11'b100_0111110: q = 4'b1000; + 11'b100_0111111: q = 4'b1000; + 11'b100_1000000: q = 4'b0001; + 11'b100_1000001: q = 4'b0001; + 11'b100_1000010: q = 4'b0001; + 11'b100_1000011: q = 4'b0001; + 11'b100_1000100: q = 4'b0001; + 11'b100_1000101: q = 4'b0001; + 11'b100_1000110: q = 4'b0001; + 11'b100_1000111: q = 4'b0001; + 11'b100_1001000: q = 4'b0001; + 11'b100_1001001: q = 4'b0001; + 11'b100_1001010: q = 4'b0001; + 11'b100_1001011: q = 4'b0001; + 11'b100_1001100: q = 4'b0001; + 11'b100_1001101: q = 4'b0001; + 11'b100_1001110: q = 4'b0001; + 11'b100_1001111: q = 4'b0001; + 11'b100_1010000: q = 4'b0001; + 11'b100_1010001: q = 4'b0001; + 11'b100_1010010: q = 4'b0001; + 11'b100_1010011: q = 4'b0001; + 11'b100_1010100: q = 4'b0001; + 11'b100_1010101: q = 4'b0001; + 11'b100_1010110: q = 4'b0001; + 11'b100_1010111: q = 4'b0001; + 11'b100_1011000: q = 4'b0001; + 11'b100_1011001: q = 4'b0001; + 11'b100_1011010: q = 4'b0001; + 11'b100_1011011: q = 4'b0001; + 11'b100_1011100: q = 4'b0010; + 11'b100_1011101: q = 4'b0010; + 11'b100_1011110: q = 4'b0010; + 11'b100_1011111: q = 4'b0010; + 11'b100_1100000: q = 4'b0010; + 11'b100_1100001: q = 4'b0010; + 11'b100_1100010: q = 4'b0010; + 11'b100_1100011: q = 4'b0010; + 11'b100_1100100: q = 4'b0010; + 11'b100_1100101: q = 4'b0010; + 11'b100_1100110: q = 4'b0010; + 11'b100_1100111: q = 4'b0010; + 11'b100_1101000: q = 4'b0010; + 11'b100_1101001: q = 4'b0010; + 11'b100_1101010: q = 4'b0010; + 11'b100_1101011: q = 4'b0010; + 11'b100_1101100: q = 4'b0010; + 11'b100_1101101: q = 4'b0010; + 11'b100_1101110: q = 4'b0010; + 11'b100_1101111: q = 4'b0010; + 11'b100_1110000: q = 4'b0010; + 11'b100_1110001: q = 4'b0010; + 11'b100_1110010: q = 4'b0010; + 11'b100_1110011: q = 4'b0010; + 11'b100_1110100: q = 4'b0000; + 11'b100_1110101: q = 4'b0000; + 11'b100_1110110: q = 4'b0000; + 11'b100_1110111: q = 4'b0000; + 11'b100_1111000: q = 4'b0000; + 11'b100_1111001: q = 4'b0000; + 11'b100_1111010: q = 4'b0000; + 11'b100_1111011: q = 4'b0000; + 11'b100_1111100: q = 4'b0000; + 11'b100_1111101: q = 4'b0000; + 11'b100_1111110: q = 4'b0000; + 11'b100_1111111: q = 4'b0000; + 11'b101_0000000: q = 4'b0000; + 11'b101_0000001: q = 4'b0000; + 11'b101_0000010: q = 4'b0000; + 11'b101_0000011: q = 4'b0000; + 11'b101_0000100: q = 4'b0000; + 11'b101_0000101: q = 4'b0000; + 11'b101_0000110: q = 4'b0000; + 11'b101_0000111: q = 4'b0000; + 11'b101_0001000: q = 4'b0000; + 11'b101_0001001: q = 4'b0000; + 11'b101_0001010: q = 4'b0000; + 11'b101_0001011: q = 4'b0000; + 11'b101_0001100: q = 4'b0100; + 11'b101_0001101: q = 4'b0100; + 11'b101_0001110: q = 4'b0100; + 11'b101_0001111: q = 4'b0100; + 11'b101_0010000: q = 4'b0100; + 11'b101_0010001: q = 4'b0100; + 11'b101_0010010: q = 4'b0100; + 11'b101_0010011: q = 4'b0100; + 11'b101_0010100: q = 4'b0100; + 11'b101_0010101: q = 4'b0100; + 11'b101_0010110: q = 4'b0100; + 11'b101_0010111: q = 4'b0100; + 11'b101_0011000: q = 4'b0100; + 11'b101_0011001: q = 4'b0100; + 11'b101_0011010: q = 4'b0100; + 11'b101_0011011: q = 4'b0100; + 11'b101_0011100: q = 4'b0100; + 11'b101_0011101: q = 4'b0100; + 11'b101_0011110: q = 4'b0100; + 11'b101_0011111: q = 4'b0100; + 11'b101_0100000: q = 4'b0100; + 11'b101_0100001: q = 4'b0100; + 11'b101_0100010: q = 4'b0100; + 11'b101_0100011: q = 4'b0100; + 11'b101_0100100: q = 4'b0100; + 11'b101_0100101: q = 4'b0100; + 11'b101_0100110: q = 4'b0100; + 11'b101_0100111: q = 4'b0100; + 11'b101_0101000: q = 4'b1000; + 11'b101_0101001: q = 4'b1000; + 11'b101_0101010: q = 4'b1000; + 11'b101_0101011: q = 4'b1000; + 11'b101_0101100: q = 4'b1000; + 11'b101_0101101: q = 4'b1000; + 11'b101_0101110: q = 4'b1000; + 11'b101_0101111: q = 4'b1000; + 11'b101_0110000: q = 4'b1000; + 11'b101_0110001: q = 4'b1000; + 11'b101_0110010: q = 4'b1000; + 11'b101_0110011: q = 4'b1000; + 11'b101_0110100: q = 4'b1000; + 11'b101_0110101: q = 4'b1000; + 11'b101_0110110: q = 4'b1000; + 11'b101_0110111: q = 4'b1000; + 11'b101_0111000: q = 4'b1000; + 11'b101_0111001: q = 4'b1000; + 11'b101_0111010: q = 4'b1000; + 11'b101_0111011: q = 4'b1000; + 11'b101_0111100: q = 4'b1000; + 11'b101_0111101: q = 4'b1000; + 11'b101_0111110: q = 4'b1000; + 11'b101_0111111: q = 4'b1000; + 11'b101_1000000: q = 4'b0001; + 11'b101_1000001: q = 4'b0001; + 11'b101_1000010: q = 4'b0001; + 11'b101_1000011: q = 4'b0001; + 11'b101_1000100: q = 4'b0001; + 11'b101_1000101: q = 4'b0001; + 11'b101_1000110: q = 4'b0001; + 11'b101_1000111: q = 4'b0001; + 11'b101_1001000: q = 4'b0001; + 11'b101_1001001: q = 4'b0001; + 11'b101_1001010: q = 4'b0001; + 11'b101_1001011: q = 4'b0001; + 11'b101_1001100: q = 4'b0001; + 11'b101_1001101: q = 4'b0001; + 11'b101_1001110: q = 4'b0001; + 11'b101_1001111: q = 4'b0001; + 11'b101_1010000: q = 4'b0001; + 11'b101_1010001: q = 4'b0001; + 11'b101_1010010: q = 4'b0001; + 11'b101_1010011: q = 4'b0001; + 11'b101_1010100: q = 4'b0001; + 11'b101_1010101: q = 4'b0001; + 11'b101_1010110: q = 4'b0001; + 11'b101_1010111: q = 4'b0001; + 11'b101_1011000: q = 4'b0010; + 11'b101_1011001: q = 4'b0010; + 11'b101_1011010: q = 4'b0010; + 11'b101_1011011: q = 4'b0010; + 11'b101_1011100: q = 4'b0010; + 11'b101_1011101: q = 4'b0010; + 11'b101_1011110: q = 4'b0010; + 11'b101_1011111: q = 4'b0010; + 11'b101_1100000: q = 4'b0010; + 11'b101_1100001: q = 4'b0010; + 11'b101_1100010: q = 4'b0010; + 11'b101_1100011: q = 4'b0010; + 11'b101_1100100: q = 4'b0010; + 11'b101_1100101: q = 4'b0010; + 11'b101_1100110: q = 4'b0010; + 11'b101_1100111: q = 4'b0010; + 11'b101_1101000: q = 4'b0010; + 11'b101_1101001: q = 4'b0010; + 11'b101_1101010: q = 4'b0010; + 11'b101_1101011: q = 4'b0010; + 11'b101_1101100: q = 4'b0010; + 11'b101_1101101: q = 4'b0010; + 11'b101_1101110: q = 4'b0010; + 11'b101_1101111: q = 4'b0010; + 11'b101_1110000: q = 4'b0000; + 11'b101_1110001: q = 4'b0000; + 11'b101_1110010: q = 4'b0000; + 11'b101_1110011: q = 4'b0000; + 11'b101_1110100: q = 4'b0000; + 11'b101_1110101: q = 4'b0000; + 11'b101_1110110: q = 4'b0000; + 11'b101_1110111: q = 4'b0000; + 11'b101_1111000: q = 4'b0000; + 11'b101_1111001: q = 4'b0000; + 11'b101_1111010: q = 4'b0000; + 11'b101_1111011: q = 4'b0000; + 11'b101_1111100: q = 4'b0000; + 11'b101_1111101: q = 4'b0000; + 11'b101_1111110: q = 4'b0000; + 11'b101_1111111: q = 4'b0000; + 11'b110_0000000: q = 4'b0000; + 11'b110_0000001: q = 4'b0000; + 11'b110_0000010: q = 4'b0000; + 11'b110_0000011: q = 4'b0000; + 11'b110_0000100: q = 4'b0000; + 11'b110_0000101: q = 4'b0000; + 11'b110_0000110: q = 4'b0000; + 11'b110_0000111: q = 4'b0000; + 11'b110_0001000: q = 4'b0000; + 11'b110_0001001: q = 4'b0000; + 11'b110_0001010: q = 4'b0000; + 11'b110_0001011: q = 4'b0000; + 11'b110_0001100: q = 4'b0000; + 11'b110_0001101: q = 4'b0000; + 11'b110_0001110: q = 4'b0000; + 11'b110_0001111: q = 4'b0000; + 11'b110_0010000: q = 4'b0100; + 11'b110_0010001: q = 4'b0100; + 11'b110_0010010: q = 4'b0100; + 11'b110_0010011: q = 4'b0100; + 11'b110_0010100: q = 4'b0100; + 11'b110_0010101: q = 4'b0100; + 11'b110_0010110: q = 4'b0100; + 11'b110_0010111: q = 4'b0100; + 11'b110_0011000: q = 4'b0100; + 11'b110_0011001: q = 4'b0100; + 11'b110_0011010: q = 4'b0100; + 11'b110_0011011: q = 4'b0100; + 11'b110_0011100: q = 4'b0100; + 11'b110_0011101: q = 4'b0100; + 11'b110_0011110: q = 4'b0100; + 11'b110_0011111: q = 4'b0100; + 11'b110_0100000: q = 4'b0100; + 11'b110_0100001: q = 4'b0100; + 11'b110_0100010: q = 4'b0100; + 11'b110_0100011: q = 4'b0100; + 11'b110_0100100: q = 4'b0100; + 11'b110_0100101: q = 4'b0100; + 11'b110_0100110: q = 4'b0100; + 11'b110_0100111: q = 4'b0100; + 11'b110_0101000: q = 4'b1000; + 11'b110_0101001: q = 4'b1000; + 11'b110_0101010: q = 4'b1000; + 11'b110_0101011: q = 4'b1000; + 11'b110_0101100: q = 4'b1000; + 11'b110_0101101: q = 4'b1000; + 11'b110_0101110: q = 4'b1000; + 11'b110_0101111: q = 4'b1000; + 11'b110_0110000: q = 4'b1000; + 11'b110_0110001: q = 4'b1000; + 11'b110_0110010: q = 4'b1000; + 11'b110_0110011: q = 4'b1000; + 11'b110_0110100: q = 4'b1000; + 11'b110_0110101: q = 4'b1000; + 11'b110_0110110: q = 4'b1000; + 11'b110_0110111: q = 4'b1000; + 11'b110_0111000: q = 4'b1000; + 11'b110_0111001: q = 4'b1000; + 11'b110_0111010: q = 4'b1000; + 11'b110_0111011: q = 4'b1000; + 11'b110_0111100: q = 4'b1000; + 11'b110_0111101: q = 4'b1000; + 11'b110_0111110: q = 4'b1000; + 11'b110_0111111: q = 4'b1000; + 11'b110_1000000: q = 4'b0001; + 11'b110_1000001: q = 4'b0001; + 11'b110_1000010: q = 4'b0001; + 11'b110_1000011: q = 4'b0001; + 11'b110_1000100: q = 4'b0001; + 11'b110_1000101: q = 4'b0001; + 11'b110_1000110: q = 4'b0001; + 11'b110_1000111: q = 4'b0001; + 11'b110_1001000: q = 4'b0001; + 11'b110_1001001: q = 4'b0001; + 11'b110_1001010: q = 4'b0001; + 11'b110_1001011: q = 4'b0001; + 11'b110_1001100: q = 4'b0001; + 11'b110_1001101: q = 4'b0001; + 11'b110_1001110: q = 4'b0001; + 11'b110_1001111: q = 4'b0001; + 11'b110_1010000: q = 4'b0001; + 11'b110_1010001: q = 4'b0001; + 11'b110_1010010: q = 4'b0001; + 11'b110_1010011: q = 4'b0001; + 11'b110_1010100: q = 4'b0010; + 11'b110_1010101: q = 4'b0010; + 11'b110_1010110: q = 4'b0010; + 11'b110_1010111: q = 4'b0010; + 11'b110_1011000: q = 4'b0010; + 11'b110_1011001: q = 4'b0010; + 11'b110_1011010: q = 4'b0010; + 11'b110_1011011: q = 4'b0010; + 11'b110_1011100: q = 4'b0010; + 11'b110_1011101: q = 4'b0010; + 11'b110_1011110: q = 4'b0010; + 11'b110_1011111: q = 4'b0010; + 11'b110_1100000: q = 4'b0010; + 11'b110_1100001: q = 4'b0010; + 11'b110_1100010: q = 4'b0010; + 11'b110_1100011: q = 4'b0010; + 11'b110_1100100: q = 4'b0010; + 11'b110_1100101: q = 4'b0010; + 11'b110_1100110: q = 4'b0010; + 11'b110_1100111: q = 4'b0010; + 11'b110_1101000: q = 4'b0010; + 11'b110_1101001: q = 4'b0010; + 11'b110_1101010: q = 4'b0010; + 11'b110_1101011: q = 4'b0010; + 11'b110_1101100: q = 4'b0010; + 11'b110_1101101: q = 4'b0010; + 11'b110_1101110: q = 4'b0010; + 11'b110_1101111: q = 4'b0010; + 11'b110_1110000: q = 4'b0000; + 11'b110_1110001: q = 4'b0000; + 11'b110_1110010: q = 4'b0000; + 11'b110_1110011: q = 4'b0000; + 11'b110_1110100: q = 4'b0000; + 11'b110_1110101: q = 4'b0000; + 11'b110_1110110: q = 4'b0000; + 11'b110_1110111: q = 4'b0000; + 11'b110_1111000: q = 4'b0000; + 11'b110_1111001: q = 4'b0000; + 11'b110_1111010: q = 4'b0000; + 11'b110_1111011: q = 4'b0000; + 11'b110_1111100: q = 4'b0000; + 11'b110_1111101: q = 4'b0000; + 11'b110_1111110: q = 4'b0000; + 11'b110_1111111: q = 4'b0000; + 11'b111_0000000: q = 4'b0000; + 11'b111_0000001: q = 4'b0000; + 11'b111_0000010: q = 4'b0000; + 11'b111_0000011: q = 4'b0000; + 11'b111_0000100: q = 4'b0000; + 11'b111_0000101: q = 4'b0000; + 11'b111_0000110: q = 4'b0000; + 11'b111_0000111: q = 4'b0000; + 11'b111_0001000: q = 4'b0000; + 11'b111_0001001: q = 4'b0000; + 11'b111_0001010: q = 4'b0000; + 11'b111_0001011: q = 4'b0000; + 11'b111_0001100: q = 4'b0000; + 11'b111_0001101: q = 4'b0000; + 11'b111_0001110: q = 4'b0000; + 11'b111_0001111: q = 4'b0000; + 11'b111_0010000: q = 4'b0100; + 11'b111_0010001: q = 4'b0100; + 11'b111_0010010: q = 4'b0100; + 11'b111_0010011: q = 4'b0100; + 11'b111_0010100: q = 4'b0100; + 11'b111_0010101: q = 4'b0100; + 11'b111_0010110: q = 4'b0100; + 11'b111_0010111: q = 4'b0100; + 11'b111_0011000: q = 4'b0100; + 11'b111_0011001: q = 4'b0100; + 11'b111_0011010: q = 4'b0100; + 11'b111_0011011: q = 4'b0100; + 11'b111_0011100: q = 4'b0100; + 11'b111_0011101: q = 4'b0100; + 11'b111_0011110: q = 4'b0100; + 11'b111_0011111: q = 4'b0100; + 11'b111_0100000: q = 4'b0100; + 11'b111_0100001: q = 4'b0100; + 11'b111_0100010: q = 4'b0100; + 11'b111_0100011: q = 4'b0100; + 11'b111_0100100: q = 4'b0100; + 11'b111_0100101: q = 4'b0100; + 11'b111_0100110: q = 4'b0100; + 11'b111_0100111: q = 4'b0100; + 11'b111_0101000: q = 4'b0100; + 11'b111_0101001: q = 4'b0100; + 11'b111_0101010: q = 4'b0100; + 11'b111_0101011: q = 4'b0100; + 11'b111_0101100: q = 4'b1000; + 11'b111_0101101: q = 4'b1000; + 11'b111_0101110: q = 4'b1000; + 11'b111_0101111: q = 4'b1000; + 11'b111_0110000: q = 4'b1000; + 11'b111_0110001: q = 4'b1000; + 11'b111_0110010: q = 4'b1000; + 11'b111_0110011: q = 4'b1000; + 11'b111_0110100: q = 4'b1000; + 11'b111_0110101: q = 4'b1000; + 11'b111_0110110: q = 4'b1000; + 11'b111_0110111: q = 4'b1000; + 11'b111_0111000: q = 4'b1000; + 11'b111_0111001: q = 4'b1000; + 11'b111_0111010: q = 4'b1000; + 11'b111_0111011: q = 4'b1000; + 11'b111_0111100: q = 4'b1000; + 11'b111_0111101: q = 4'b1000; + 11'b111_0111110: q = 4'b1000; + 11'b111_0111111: q = 4'b1000; + 11'b111_1000000: q = 4'b0001; + 11'b111_1000001: q = 4'b0001; + 11'b111_1000010: q = 4'b0001; + 11'b111_1000011: q = 4'b0001; + 11'b111_1000100: q = 4'b0001; + 11'b111_1000101: q = 4'b0001; + 11'b111_1000110: q = 4'b0001; + 11'b111_1000111: q = 4'b0001; + 11'b111_1001000: q = 4'b0001; + 11'b111_1001001: q = 4'b0001; + 11'b111_1001010: q = 4'b0001; + 11'b111_1001011: q = 4'b0001; + 11'b111_1001100: q = 4'b0001; + 11'b111_1001101: q = 4'b0001; + 11'b111_1001110: q = 4'b0001; + 11'b111_1001111: q = 4'b0001; + 11'b111_1010000: q = 4'b0001; + 11'b111_1010001: q = 4'b0001; + 11'b111_1010010: q = 4'b0010; + 11'b111_1010011: q = 4'b0010; + 11'b111_1010100: q = 4'b0010; + 11'b111_1010101: q = 4'b0010; + 11'b111_1010110: q = 4'b0010; + 11'b111_1010111: q = 4'b0010; + 11'b111_1011000: q = 4'b0010; + 11'b111_1011001: q = 4'b0010; + 11'b111_1011010: q = 4'b0010; + 11'b111_1011011: q = 4'b0010; + 11'b111_1011100: q = 4'b0010; + 11'b111_1011101: q = 4'b0010; + 11'b111_1011110: q = 4'b0010; + 11'b111_1011111: q = 4'b0010; + 11'b111_1100000: q = 4'b0010; + 11'b111_1100001: q = 4'b0010; + 11'b111_1100010: q = 4'b0010; + 11'b111_1100011: q = 4'b0010; + 11'b111_1100100: q = 4'b0010; + 11'b111_1100101: q = 4'b0010; + 11'b111_1100110: q = 4'b0010; + 11'b111_1100111: q = 4'b0010; + 11'b111_1101000: q = 4'b0010; + 11'b111_1101001: q = 4'b0010; + 11'b111_1101010: q = 4'b0010; + 11'b111_1101011: q = 4'b0010; + 11'b111_1101100: q = 4'b0010; + 11'b111_1101101: q = 4'b0010; + 11'b111_1101110: q = 4'b0010; + 11'b111_1101111: q = 4'b0010; + 11'b111_1110000: q = 4'b0000; + 11'b111_1110001: q = 4'b0000; + 11'b111_1110010: q = 4'b0000; + 11'b111_1110011: q = 4'b0000; + 11'b111_1110100: q = 4'b0000; + 11'b111_1110101: q = 4'b0000; + 11'b111_1110110: q = 4'b0000; + 11'b111_1110111: q = 4'b0000; + 11'b111_1111000: q = 4'b0000; + 11'b111_1111001: q = 4'b0000; + 11'b111_1111010: q = 4'b0000; + 11'b111_1111011: q = 4'b0000; + 11'b111_1111100: q = 4'b0000; + 11'b111_1111101: q = 4'b0000; + 11'b111_1111110: q = 4'b0000; + 11'b111_1111111: q = 4'b0000; + endcase diff --git a/pipelined/srt/sim-srt b/pipelined/srt/sim-srt new file mode 100755 index 000000000..d0d5236a8 --- /dev/null +++ b/pipelined/srt/sim-srt @@ -0,0 +1,2 @@ +vsim -do "do srt.do" + diff --git a/pipelined/srt/sim-srt-batch b/pipelined/srt/sim-srt-batch new file mode 100755 index 000000000..082c83891 --- /dev/null +++ b/pipelined/srt/sim-srt-batch @@ -0,0 +1 @@ +vsim -c -do "do srt.do" diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen new file mode 100755 index 000000000..dadc5dc5c Binary files /dev/null and b/pipelined/srt/sqrttestgen differ diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c new file mode 100644 index 000000000..b4ece1473 --- /dev/null +++ b/pipelined/srt/sqrttestgen.c @@ -0,0 +1,100 @@ +/* sqrttestgen.c */ + +/* Written 19 October 2021 David_Harris@hmc.edu + + This program creates test vectors for mantissa component + of an IEEE floating point square root. + */ + +/* #includes */ + +#include +#include +#include + +/* Constants */ + +#define ENTRIES 17 +#define RANDOM_VECS 500 + +/* Prototypes */ + +void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac); +void printhex(FILE *fptr, double x); +double random_input(void); + +/* Main */ + +void main(void) +{ + FILE *fptr; + double aFrac, rFrac; + int aExp, rExp; + double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, + 1.75, 1.875, 1.99999, + 1.1, 1.2, 1.01, 1.001, 1.0001, +<<<<<<< Updated upstream + 1/1.1, 1/1.5, 1/1.25, 1/1.125}; +======= + 2/1.1, 2/1.5, 2/1.25, 2/1.125}; +>>>>>>> Stashed changes + double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16}; + int i; + int bias = 1023; + + if ((fptr = fopen("sqrttestvectors","w")) == NULL) { + fprintf(stderr, "Couldn't write sqrttestvectors file\n"); + exit(1); + } + + for (i=0; i2) m /= 2; + for (i=0; i<52; i+=4) { + m = m - floor(m); + m = m * 16; + val = (int)(m)%16; + fprintf(fptr, "%x", val); + } +} + +double random_input(void) +{ + return 1.0 + rand()/32767.0; +} + diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do new file mode 100644 index 000000000..340c5b1f7 --- /dev/null +++ b/pipelined/srt/srt-waves.do @@ -0,0 +1,5 @@ +add wave -noupdate /testbench/* +add wave -noupdate /testbench/srt/* +add wave -noupdate /testbench/srt/otfc2/* +add wave -noupdate /testbench/srt/preproc/* +add wave -noupdate /testbench/srt/divcounter/* diff --git a/pipelined/srt/srt.do b/pipelined/srt/srt.do new file mode 100644 index 000000000..6097cf47d --- /dev/null +++ b/pipelined/srt/srt.do @@ -0,0 +1,28 @@ +# srt.do +# +# David_Harris@hmc.edu 19 October 2021 + +# Use this wally-pipelined.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do wally-pipelined.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do wally-pipelined.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +vlog +incdir+../config/rv64gc +incdir+../config/shared srt.sv testbench.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv +vopt +acc work.testbench -o workopt +vsim workopt + +-- display input and output signals as hexidecimal values +do ./srt-waves.do + +-- Run the Simulation +run -all diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv new file mode 100644 index 000000000..a980511ff --- /dev/null +++ b/pipelined/srt/srt.sv @@ -0,0 +1,334 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu 13 January 2022 +// Modified: cturek@hmc.edu June 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" +`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN)) + +module srt ( + input logic clk, + input logic Start, + input logic Stall, // *** multiple pipe stages + input logic Flush, // *** multiple pipe stages + // Floating Point Inputs + // later add exponents, signs, special cases + input logic XSign, YSign, + input logic [`NE-1:0] XExp, YExp, + input logic [`NF-1:0] SrcXFrac, SrcYFrac, + input logic [`XLEN-1:0] SrcA, SrcB, + input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit + input logic W64, // 32-bit ints on XLEN=64 + input logic Signed, // Interpret integers as signed 2's complement + input logic Int, // Choose integer inputs + input logic Sqrt, // perform square root, not divide + output logic rsign, done, + output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers + output logic [`NE-1:0] rExp, + output logic [3:0] Flags +); + + logic qp, qz, qm; // quotient is +1, 0, or -1 + logic [`NE-1:0] calcExp; + logic calcSign; + logic [`DIVLEN+3:0] X, Dpreproc; + logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; + logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; + logic intSign; + + srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); + + // Top Muxes and Registers + // When start is asserted, the inputs are loaded into the divider. + // Otherwise, the divisor is retained and the partial remainder + // is fed back for the next iteration. + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+2:0], 1'b0}, X, Start, WSN); + flop #(`DIVLEN+4) wsflop(clk, WSN, WS); + mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+2:0], 1'b0}, {(`DIVLEN+4){1'b0}}, Start, WCN); + flop #(`DIVLEN+4) wcflop(clk, WCN, WC); + flopen #(`DIVLEN+4) dflop(clk, Start, Dpreproc, D); + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm); + + flopen #(`NE) expflop(clk, Start, calcExp, rExp); + flopen #(1) signflop(clk, Start, calcSign, rsign); + flopen #(7) durflop(clk, Start, calcDur, dur); + + counter divcounter(clk, Start, dur, done); + + // Divisor Selection logic + assign Db = ~D; + mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); + + // Partial Product Generation + csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + + otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + +<<<<<<< Updated upstream + expcalc expcalc(.XExp, .YExp, .calcExp); +======= + expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); +>>>>>>> Stashed changes + + signcalc signcalc(.XSign, .YSign, .calcSign); +endmodule + +//////////////// +// Submodules // +//////////////// + +/////////////////// +// Preprocessing // +/////////////////// +module srtpreproc ( + input logic [`XLEN-1:0] SrcA, SrcB, + input logic [`NF-1:0] SrcXFrac, SrcYFrac, + input logic [`NE-1:0] XExp, + input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit + input logic W64, // 32-bit ints on XLEN=64 + input logic Signed, // Interpret integers as signed 2's complement + input logic Int, // Choose integer inputs + input logic Sqrt, // perform square root, not divide + output logic [`DIVLEN+3:0] X, D, + output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent + output logic intSign // Quotient integer sign +); + + logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; + logic [`XLEN-1:0] PosA, PosB; + logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX; + + assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; + assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; + + lzc #(`XLEN) lzcA (PosA, zeroCntA); + lzc #(`XLEN) lzcB (PosB, zeroCntB); + + assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}}; + assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}}; + + assign PreprocA = ExtraA << (zeroCntA + 1); + assign PreprocB = ExtraB << (zeroCntB + 1); + assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}}; + assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}}; + + assign DivX = Int ? PreprocA : PreprocX; + assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac}; + + assign X = Sqrt ? SqrtX : {4'b0001, DivX}; + assign D = {4'b0001, Int ? PreprocB : PreprocY}; + assign intExp = zeroCntB - zeroCntA + 1; + assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + + assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2); +endmodule + +///////////////////////////////// +// Quotient Selection, Radix 2 // +///////////////////////////////// +module qsel2 ( // *** eventually just change to 4 bits + input logic [`DIVLEN+3:`DIVLEN] ps, pc, + output logic qp, qz, qm +); + + logic [`DIVLEN+3:`DIVLEN] p, g; + logic magnitude, sign, cout; + + // The quotient selection logic is presented for simplicity, not + // for efficiency. You can probably optimize your logic to + // select the proper divisor with less delay. + + // Quotient equations from EE371 lecture notes 13-20 + assign p = ps ^ pc; + assign g = ps & pc; + + assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); + assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); + assign #1 sign = p[`DIVLEN+3] ^ cout; +/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & + (ps[52]^pc[52])); + assign #1 sign = (ps[55]^pc[55])^ + (ps[54] & pc[54] | ((ps[54]^pc[54]) & + (ps[53]&pc[53] | ((ps[53]^pc[53]) & + (ps[52]&pc[52]))))); */ + + // Produce quotient = +1, 0, or -1 + assign #1 qp = magnitude & ~sign; + assign #1 qz = ~magnitude; + assign #1 qm = magnitude & sign; +endmodule + +/////////////////////////////////// +// On-The-Fly Converter, Radix 2 // +/////////////////////////////////// +module otfc2 #(parameter N=64) ( + input logic clk, + input logic Start, + input logic qp, qz, qm, + output logic [N-1:0] r +); + + // The on-the-fly converter transfers the quotient + // bits to the quotient as they come. + // + // This code follows the psuedocode presented in the + // floating point chapter of the book. Right now, + // it is written for Radix-2 division. + // + // QM is Q-1. It allows us to write negative bits + // without using a costly CPA. + logic [N+2:0] Q, QM, QNext, QMNext, QMMux; + // QR and QMR are the shifted versions of Q and QM. + // They are treated as [N-1:r] size signals, and + // discard the r most significant bits of Q and QM. + logic [N+1:0] QR, QMR; + + flopr #(N+3) Qreg(clk, Start, QNext, Q); + mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); + flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); + + always_comb begin + QR = Q[N+1:0]; + QMR = QM[N+1:0]; // Shift Q and QM + if (qp) begin + QNext = {QR, 1'b1}; + QMNext = {QR, 1'b0}; + end else if (qz) begin + QNext = {QR, 1'b0}; + QMNext = {QMR, 1'b1}; + end else begin // If qp and qz are not true, then qm is + QNext = {QMR, 1'b1}; + QMNext = {QMR, 1'b0}; + end + end + assign r = Q[N+2] ? Q[N+1:2] : Q[N:1]; + +endmodule + +///////////// +// counter // +///////////// +module counter(input logic clk, + input logic req, + input logic [$clog2(`XLEN+1)-1:0] dur, + output logic done); + + logic [$clog2(`XLEN+1)-1:0] count; + + // This block of control logic sequences the divider + // through its iterations. You may modify it if you + // build a divider which completes in fewer iterations. + // You are not responsible for the (trivial) circuit + // design of the block. + + always @(posedge clk) + begin + if (count == dur) done <= #1 1; + else if (done | req) done <= #1 0; + if (req) count <= #1 0; + else count <= #1 count+1; + end +endmodule + +////////// +// mux3 // +////////// +module mux3onehot #(parameter N=65) ( + input logic [N+3:0] in0, in1, in2, + input logic sel0, sel1, sel2, + output logic [N+3:0] out +); + + // lazy inspection of the selects + // really we should make sure selects are mutually exclusive + assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2); +endmodule + + +///////// +// csa // +///////// +module csa #(parameter N=69) ( + input logic [N-1:0] in1, in2, in3, + input logic cin, + output logic [N-1:0] out1, out2 +); + + // This block adds in1, in2, in3, and cin to produce + // a result out1 / out2 in carry-save redundant form. + // cin is just added to the least significant bit and + // is required to handle adding a negative divisor. + // Fortunately, the carry (out2) is shifted left by one + // bit, leaving room in the least significant bit to + // insert cin. + + assign #1 out1 = in1 ^ in2 ^ in3; + assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | + (in2[N-2:0] & in3[N-2:0]), cin}; +endmodule + + +////////////// +// expcalc // +////////////// +module expcalc( +<<<<<<< Updated upstream + input logic [`NE-1:0] XExp, YExp, + output logic [`NE-1:0] calcExp +); + + assign calcExp = XExp - YExp + (`NE)'(`BIAS); +======= + input logic [`NE-1:0] XExp, YExp, + input logic Sqrt, + output logic [`NE-1:0] calcExp +); + logic [`NE-1:0] SExp, DExp, SXExp; + assign SXExp = XExp - (`NE)'(`BIAS); + assign SExp = {1'b0, SXExp[`NE-1:1]} + (`NE)'(`BIAS); + assign DExp = XExp - YExp + (`NE)'(`BIAS); + assign calcExp = Sqrt ? SExp : DExp; +>>>>>>> Stashed changes + +endmodule + +////////////// +// signcalc // +////////////// +module signcalc( + input logic XSign, YSign, + output logic calcSign +); + + assign calcSign = XSign ^ YSign; + +endmodule \ No newline at end of file diff --git a/pipelined/srt/srt_stanford.sv b/pipelined/srt/srt_stanford.sv new file mode 100644 index 000000000..ce0417f56 --- /dev/null +++ b/pipelined/srt/srt_stanford.sv @@ -0,0 +1,355 @@ +/////////////////////////////////////////////////////// +// srt.sv // +// // +// Written 10/31/96 by David Harris harrisd@leland // +// Updated 10/19/21 David_Harris@hmc.edu // +// // +// This file models a simple Radix 2 SRT divider. // +// // +/////////////////////////////////////////////////////// + +// This Verilog file models a radix 2 SRT divider which +// produces one quotient digit per cycle. The divider +// keeps the partial remainder in carry-save form. + +///////// +// srt // +///////// +module srt(input logic clk, + input logic req, + input logic sqrt, // 1 to compute sqrt(a), 0 to compute a/b + input logic [51:0] a, b, + output logic [54:0] rp, rm); + + // A simple Radix 2 SRT divider/sqrt + + + // Internal signals + + logic [55:0] ps, pc; // partial remainder in carry-save form + logic [55:0] d; // divisor + logic [55:0] psa, pca; // partial remainder result of csa + logic [55:0] psn, pcn; // partial remainder for next cycle + logic [55:0] dn; // divisor for next cycle + logic [55:0] dsel; // selected divisor multiple + logic qp, qz, qm; // quotient is +1, 0, or -1 + logic [55:0] d_b; // inverse of divisor + + // Top Muxes and Registers + // When start is asserted, the inputs are loaded into the divider. + // Otherwise, the divisor is retained and the partial remainder + // is fed back for the next iteration. + mux2 psmux({psa[54:0], 1'b0}, {4'b0001, a}, req, psn); + flop psflop(clk, psn, ps); + mux2 pcmux({pca[54:0], 1'b0}, 56'b0, req, pcn); + flop pcflop(clk, pcn, pc); + mux2 dmux(d, {4'b0001, b}, req, dn); + flop dflop(clk, dn, d); + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + // Accumulate quotient digits in a shift register + qsel qsel(ps[55:52], pc[55:52], qp, qz, qm); + qacc qacc(clk, req, qp, qz, qm, rp, rm); + + // Divisor Selection logic + inv dinv(d, d_b); + mux3 divisorsel(d_b, 56'b0, d, qp, qz, qm, dsel); + + // Partial Product Generation + csa csa(ps, pc, dsel, qp, psa, pca); +endmodule + +////////// +// mux2 // +////////// +module mux2(input logic [55:0] in0, in1, + input logic sel, + output logic [55:0] out); + + assign #1 out = sel ? in1 : in0; +endmodule + +////////// +// flop // +////////// +module flop(clk, in, out); + input clk; + input [55:0] in; + output [55:0] out; + + logic [55:0] state; + + always @(posedge clk) + state <= #1 in; + + assign #1 out = state; +endmodule + +////////// +// qsel // +////////// +module qsel(input logic [55:52] ps, pc, + output logic qp, qz, qm); + + logic [55:52] p, g; + logic magnitude, sign, cout; + + // The quotient selection logic is presented for simplicity, not + // for efficiency. You can probably optimize your logic to + // select the proper divisor with less delay. + + // Quotient equations from EE371 lecture notes 13-20 + assign p = ps ^ pc; + assign g = ps & pc; + + assign #1 magnitude = ~(&p[54:52]); + assign #1 cout = g[54] | (p[54] & (g[53] | p[53] & g[52])); + assign #1 sign = p[55] ^ cout; +/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & + (ps[52]^pc[52])); + assign #1 sign = (ps[55]^pc[55])^ + (ps[54] & pc[54] | ((ps[54]^pc[54]) & + (ps[53]&pc[53] | ((ps[53]^pc[53]) & + (ps[52]&pc[52]))))); */ + + // Produce quotient = +1, 0, or -1 + assign #1 qp = magnitude & ~sign; + assign #1 qz = ~magnitude; + assign #1 qm = magnitude & sign; +endmodule + +////////// +// qacc // +////////// +module qacc(clk, req, qp, qz, qm, rp, rm); + input clk; + input req; + input qp; + input qz; + input qm; + output [54:0] rp; + output [54:0] rm; + + logic [54:0] rp, rm; // quotient bit is +/- 1; + logic [7:0] count; + + always @(posedge clk) + begin + if (req) + begin + rp <= #1 0; + rm <= #1 0; + end + else + begin + rp <= #1 {rp[54:0], qp}; + rm <= #1 {rm[54:0], qm}; + end + end +endmodule + +///////// +// inv // +///////// +module inv(input logic [55:0] in, + output logic [55:0] out); + + assign #1 out = ~in; +endmodule + +////////// +// mux3 // +////////// +module mux3(in0, in1, in2, sel0, sel1, sel2, out); + input [55:0] in0; + input [55:0] in1; + input [55:0] in2; + input sel0; + input sel1; + input sel2; + output [55:0] out; + + // lazy inspection of the selects + // really we should make sure selects are mutually exclusive + assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2); +endmodule + +///////// +// csa // +///////// +module csa(in1, in2, in3, cin, out1, out2); + input [55:0] in1; + input [55:0] in2; + input [55:0] in3; + input cin; + output [55:0] out1; + output [55:0] out2; + + // This block adds in1, in2, in3, and cin to produce + // a result out1 / out2 in carry-save redundant form. + // cin is just added to the least significant bit and + // is required to handle adding a negative divisor. + // Fortunately, the carry (out2) is shifted left by one + // bit, leaving room in the least significant bit to + // insert cin. + + assign #1 out1 = in1 ^ in2 ^ in3; + assign #1 out2 = {in1[54:0] & (in2[54:0] | in3[54:0]) | + (in2[54:0] & in3[54:0]), cin}; +endmodule + +////////////// +// finaladd // +////////////// +module finaladd(rp, rm, r); + input [54:0] rp; + input [54:0] rm; + output [51:0] r; + + logic [54:0] diff; + + // this magic block performs the final addition for you + // to convert the positive and negative quotient digits + // into a normalized mantissa. It returns the 52 bit + // mantissa after shifting to guarantee a leading 1. + // You can assume this block operates in one cycle + // and do not need to budget it in your area and power + // calculations. + + // Since no rounding is performed, the result may be too + // small by one unit in the least significant place (ulp). + // The checker ignores such an error. + + assign #1 diff = rp - rm; + assign #1 r = diff[54] ? diff[53:2] : diff[52:1]; +endmodule + +///////////// +// counter // +///////////// +module counter(input logic clk, + input logic req, + output logic done); + + logic [5:0] count; + + // This block of control logic sequences the divider + // through its iterations. You may modify it if you + // build a divider which completes in fewer iterations. + // You are not responsible for the (trivial) circuit + // design of the block. + + always @(posedge clk) + begin + if (count == 54) done <= #1 1; + else if (done | req) done <= #1 0; + if (req) count <= #1 0; + else count <= #1 count+1; + end +endmodule + +/////////// +// clock // +/////////// +module clock(clk); + output clk; + + // Internal clk signal + logic clk; + +endmodule + +////////// +// testbench // +////////// +module testbench; + logic clk; + logic req; + logic done; + logic [51:0] a; + logic [51:0] b; + logic [51:0] r; + logic [54:0] rp, rm; // positive quotient digits + + // Test parameters + parameter MEM_SIZE = 40000; + parameter MEM_WIDTH = 52+52+52; + + `define memr 51:0 + `define memb 103:52 + `define mema 155:104 + + // Test logicisters + logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file + logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a + // bit field of an array + logic [51:0] correctr, nextr; + integer testnum, errors; + + // Divider + srt srt(clk, req, a, b, rp, rm); + + // Final adder converts quotient digits to 2's complement & normalizes + finaladd finaladd(rp, rm, r); + + // Counter + counter counter(clk, req, done); + + + initial + forever + begin + clk = 1; #17; + clk = 0; #16; + end + + + // Read test vectors from disk + initial + begin + testnum = 0; + errors = 0; + $readmemh ("testvectors", Tests); + Vec = Tests[testnum]; + a = Vec[`mema]; + b = Vec[`memb]; + nextr = Vec[`memr]; + req <= #5 1; + end + + // Apply directed test vectors read from file. + + always @(posedge clk) + begin + if (done) + begin + req <= #5 1; + $display("result was %h, should be %h\n", r, correctr); + if ((correctr - r) > 1) // check if accurate to 1 ulp + begin + errors = errors+1; + $display("failed\n"); + $stop; + end + if (a === 52'hxxxxxxxxxxxxx) + begin + $display("Tests completed successfully"); + $stop; + end + end + if (req) + begin + req <= #5 0; + correctr = nextr; + testnum = testnum+1; + Vec = Tests[testnum]; + $display("a = %h b = %h",a,b); + a = Vec[`mema]; + b = Vec[`memb]; + nextr = Vec[`memr]; + end + end + +endmodule + diff --git a/pipelined/srt/stine/Makefile b/pipelined/srt/stine/Makefile new file mode 100644 index 000000000..8f745b47a --- /dev/null +++ b/pipelined/srt/stine/Makefile @@ -0,0 +1,27 @@ + +CC = gcc +CFLAGS = -lm +LIBS = +OBJS4 = disp.o srt4div.o +OBJS2 = disp.o srt2div.o + +all: srt4div srt2div + +disp.o: disp.h disp.c + $(CC) -g -c -o disp.o disp.c + +srt4div.o: srt4div.c + $(CC) -g -c -o srt4div.o srt4div.c + +srt2div.o: srt2div.c + $(CC) -g -c -o srt2div.o srt2div.c + +srt4div: $(OBJS4) + $(CC) -g -O3 -o srt4div $(OBJS4) $(CFLAGS) + +srt2div: $(OBJS2) + $(CC) -g -O3 -o srt2div $(OBJS2) $(CFLAGS) + +clean: + rm -f *.o *~ + rm -f core diff --git a/pipelined/srt/stine/README b/pipelined/srt/stine/README new file mode 100755 index 000000000..6898c5cec --- /dev/null +++ b/pipelined/srt/stine/README @@ -0,0 +1 @@ +vsim -do iter64.do -c diff --git a/pipelined/srt/stine/README.md b/pipelined/srt/stine/README.md new file mode 100755 index 000000000..ebb006c95 --- /dev/null +++ b/pipelined/srt/stine/README.md @@ -0,0 +1,22 @@ +This is a novel integer divider using r4 division by recurrence. The +reference is: + +J. E. Stine and K. Hill, "An Efficient Implementation of Radix-4 +Integer Division Using Scaling," 2020 IEEE 63rd International Midwest +Symposium on Circuits and Systems (MWSCAS), Springfield, MA, USA, +2020, pp. 1092-1095, doi: 10.1109/MWSCAS48704.2020.9184631. + +Although this version does not contain scaling, it could do this, if +needed. Moreover, a higher radix or overlapped radix can be done +easily to expand the the size. Also, the implementations here are +initially unsigned but hope to expand for signed, which should be +easy. + +There are two types of tests in this directory within each testbench. +One tests for 32-bits and the other 64-bits: + +int32div.do and int64div.do = test individual vector for debugging + +iter32.do and iter64.do = do not use any waveform generation and just +output lots of tests + diff --git a/pipelined/srt/stine/checkme.sh b/pipelined/srt/stine/checkme.sh new file mode 100755 index 000000000..acbd94008 --- /dev/null +++ b/pipelined/srt/stine/checkme.sh @@ -0,0 +1,19 @@ +#!/bin/sh +cat iter64_signed.out | grep "0 1$" +cat iter64_signed.out | grep "1 0$" +cat iter64_signed.out | grep "0 0$" +cat iter64_unsigned.out | grep "0 1$" +cat iter64_unsigned.out | grep "1 0$" +cat iter64_unsigned.out | grep "0 0$" +cat iter32_signed.out | grep "0 1$" +cat iter32_signed.out | grep "1 0$" +cat iter32_signed.out | grep "0 0$" +cat iter32_unsigned.out | grep "0 1$" +cat iter32_unsigned.out | grep "1 0$" +cat iter32_unsigned.out | grep "0 0$" +cat iter128_signed.out | grep "0 1$" +cat iter128_signed.out | grep "1 0$" +cat iter128_signed.out | grep "0 0$" +cat iter128_unsigned.out | grep "0 1$" +cat iter128_unsigned.out | grep "1 0$" +cat iter128_unsigned.out | grep "0 0$" diff --git a/pipelined/srt/stine/disp.c b/pipelined/srt/stine/disp.c new file mode 100755 index 000000000..66652cf01 --- /dev/null +++ b/pipelined/srt/stine/disp.c @@ -0,0 +1,60 @@ +#include "disp.h" + +double rnd_zero(double x, double bits) { + if (x < 0) + return ceiling(x, bits); + else + return flr(x, bits); +} + +double rne(double x, double precision) { + double scale, x_round; + scale = pow(2.0, precision); + x_round = rint(x * scale) / scale; + return x_round; +} + +double flr(double x, double precision) { + double scale, x_round; + scale = pow(2.0, precision); + x_round = floor(x * scale) / scale; + return x_round; +} + +double ceiling(double x, double precision) { + double scale, x_round; + scale = pow(2.0, precision); + x_round = ceil(x * scale) / scale; + return x_round; +} + +void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file) { + + double diff; + int i; + if (fabs(x) < pow(2.0, -bits_to_right)) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + fprintf(out_file,"0"); + } + return; + } + if (x < 0.0) { + // fprintf(out_file, "-"); + // x = - x; + x = pow(2.0, ((double) bits_to_left)) + x; + } + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, -i); + if (x < diff) { + fprintf(out_file, "0"); + } + else { + fprintf(out_file, "1"); + x -= diff; + } + if (i == 0) { + fprintf(out_file, "."); + } + } +} + diff --git a/pipelined/srt/stine/disp.h b/pipelined/srt/stine/disp.h new file mode 100755 index 000000000..bf2c78872 --- /dev/null +++ b/pipelined/srt/stine/disp.h @@ -0,0 +1,18 @@ +#include +#include +#include + +#ifndef DISP +#define DISP + +double rnd_zero(double x, double bits); + +double rne(double x, double precision); + +double flr(double x, double precision); + +double ceiling(double x, double precision); + +void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file); + +#endif diff --git a/pipelined/srt/stine/idiv-config.vh b/pipelined/srt/stine/idiv-config.vh new file mode 100644 index 000000000..8afa1e75b --- /dev/null +++ b/pipelined/srt/stine/idiv-config.vh @@ -0,0 +1,27 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: james.stine@okstate.edu 9 June 2022 +// Modified: +// +// Purpose: Specify which features are configured +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +// Integer division tests +`define IDIV_TESTS 1048576 diff --git a/pipelined/srt/stine/intdiv.sv b/pipelined/srt/stine/intdiv.sv new file mode 100755 index 000000000..03c7b8977 --- /dev/null +++ b/pipelined/srt/stine/intdiv.sv @@ -0,0 +1,2802 @@ +/////////////////////////////////////////// +// intdiv.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +// *** I added these verilator controls to clean up the +// lint output. The linter warnings should be fixed, but now the output is at +// least readable. +/* verilator lint_off COMBDLY */ +/* verilator lint_off IMPLICIT */ + +`include "idiv-config.vh" + +module intdiv #(parameter WIDTH=64) + (Qf, done, remf, div0, N, D, clk, reset, start, S); + + input logic [WIDTH-1:0] N, D; + input logic clk; + input logic reset; + input logic start; + input logic S; + + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; + output logic div0; + output logic done; + + logic enable; + logic state0; + logic V; + logic [$clog2(WIDTH):0] Num; + logic [$clog2(WIDTH)-1:0] P, NumIter, RemShift, RemShiftP; + logic [WIDTH-1:0] op1, op2, op1shift, Rem5; + logic [WIDTH:0] Qd, Rd, Qd2, Rd2; + logic [WIDTH:0] Q2d, Qd3; + logic [WIDTH-1:0] Q, Q2, rem0; + logic [3:0] quotient; + logic otfzero; + logic shiftResult; + + logic [WIDTH-1:0] twoD; + logic [WIDTH-1:0] twoN; + logic SignD; + logic SignN; + logic [WIDTH-1:0] QT, remT; + logic D_NegOne; + logic Max_N; + logic [1:0] QR; + logic tcQ, tcR; + + // Check if negative (two's complement) + // If so, convert to positive + adder #(WIDTH) cpa1 ((D ^ {WIDTH{D[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, D[WIDTH-1]&S}, twoD); + adder #(WIDTH) cpa2 ((N ^ {WIDTH{N[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, N[WIDTH-1]&S}, twoN); + assign SignD = D[WIDTH-1]; + assign SignN = N[WIDTH-1]; + // Max N and D = -1 (Overflow) + assign Max_N = (~|N[WIDTH-2:0]) & N[WIDTH-1]; + assign D_NegOne = &D; + + // Divider goes the distance to 37 cycles + // (thanks to the evil divisor for D = 0x1) + // The enable signal turns off register storage thus invalidating + // any future cycles. + + // Shift D, if needed (for integer) + // needed to allow qst to be in range for integer + // division [1,2) and allow integer divide to work. + // + // The V or valid bit can be used to determine if D + // is 0 and thus a divide by 0 exception. This div0 + // exception is given to FSM to tell the operation to + // quit gracefully. + + lod_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); + shift_left #(WIDTH) p2 (twoD, P, op2); + assign op1 = twoN; + assign div0 = ~V; + + // #iter: N = m+v+s = m+2+s (mod k = 0) + // v = 2 since \rho < 1 (add 4 to make sure its a ceil) + // k = 2 (r = 2^k) + adder #($clog2(WIDTH)+1) cpa3 ({1'b0, P}, + {{$clog2(WIDTH)+1-3{1'b0}}, shiftResult, ~shiftResult, 1'b0}, + Num); + + // Determine whether need to add just Q/Rem + assign shiftResult = P[0]; + // div by 2 (ceil) + assign NumIter = Num[$clog2(WIDTH):1]; + assign RemShift = P; + + // Avoid critical path of RemShift + flopr #($clog2(WIDTH)) reg1 (clk, reset, RemShift, RemShiftP); + + // FSM to control integer divider + // assume inputs are postive edge and + // datapath (divider) is negative edge + fsm64 #($clog2(WIDTH)) fsm1 (enablev, state0v, donev, otfzerov, + start, div0, NumIter, ~clk, reset); + + flopr #(1) rega (~clk, reset, donev, done); + flopr #(1) regc (~clk, reset, otfzerov, otfzero); + flopr #(1) regd (~clk, reset, enablev, enable); + flopr #(1) rege (~clk, reset, state0v, state0); + + // To obtain a correct remainder the last bit of the + // quotient has to be aligned with a radix-r boundary. + // Since the quotient is in the range 1/2 < q < 2 (one + // integer bit and m fractional bits), this is achieved by + // shifting N right by v+s so that (m+v+s) mod k = 0. And, + // the quotient has to be aligned to the integer position. + divide4 #(WIDTH) p3 (Qd, Q2d, Rd, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + // Storage registers to hold contents stable + flopenr #(WIDTH+1) reg3 (clk, reset, enable, Rd, Rd2); + flopenr #(WIDTH+1) reg4 (clk, reset, enable, Qd, Qd2); + flopenr #(WIDTH+1) reg5 (clk, reset, enable, Q2d, Qd3); + + // Probably not needed - just assigns results + assign Q = Qd2[WIDTH-1:0]; + assign Rem5 = Rd2[WIDTH:1]; + assign Q2 = Qd3[WIDTH-1:0]; + + // Adjust remainder by m (no need to adjust by + shift_right #(WIDTH) p4 (Rem5, RemShiftP, rem0); + + // Adjust Q/Rem for Signed + always_comb + casex({S, SignN, SignD}) + 3'b000 : QR = 2'b00; + 3'b001 : QR = 2'b00; + 3'b010 : QR = 2'b00; + 3'b011 : QR = 2'b00; + 3'b100 : QR = 2'b00; + 3'b101 : QR = 2'b10; + 3'b110 : QR = 2'b11; + 3'b111 : QR = 2'b01; + default: QR = 2'b00; + endcase // casex ({SignN, SignD, S}) + assign {tcQ, tcR} = QR; + + // When Dividend (N) and/or Divisor (D) are negative (first bit is '1'): + // - When N and D are negative: Remainder i + // s negative (undergoes a two's complement). + // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement). + // - When D is negative: Quotient is negative (undergoes a two's complement). + adder #(WIDTH) cpa4 ((rem0 ^ {WIDTH{tcR}}), {{WIDTH-1{1'b0}}, tcR}, remT); + adder #(WIDTH) cpa5 ((Q ^ {WIDTH{tcQ}}), {{WIDTH-1{1'b0}}, tcQ}, QT); + + // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec) + exception_int #(WIDTH) exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); + +endmodule // intdiv + +// Division by Recurrence (r=4) +module divide4 #(parameter WIDTH=64) + (Q, Q2, rem0, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + input logic [WIDTH-1:0] op1, op2; + input logic clk, state0; + input logic reset; + input logic enable; + input logic otfzero; + input logic shiftResult; + + output logic [WIDTH:0] rem0; + output logic [WIDTH:0] Q; + output logic [WIDTH:0] Q2; + output logic [3:0] quotient; + + logic [WIDTH+3:0] Sum, Carry; + logic [WIDTH:0] Qstar; + logic [WIDTH:0] QMstar; + logic [WIDTH:0] QM2star; + logic [7:0] qtotal; + logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2; + logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1; + logic [WIDTH+3:0] mdivi_temp, mdivi; + logic zero; + logic [1:0] qsel; + logic [1:0] Qin, QMin; + logic CshiftQ, CshiftQM; + logic [WIDTH+3:0] rem1, rem2, rem3; + logic [WIDTH+3:0] SumR, CarryR; + logic [WIDTH:0] Qt; + + // Create one's complement values of Divisor (for q*D) + assign divi1 = {3'h0, op2, 1'b0}; + assign divi2 = {2'h0, op2, 2'b0}; + assign divi1c = ~divi1; + assign divi2c = ~divi2; + // Shift x1 if not mod k + mux2 #(WIDTH+4) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); + + // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) + mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN); + mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN); + // Simplify QST + adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal); + // q = {+2, +1, -1, -2} else q = 0 + qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient); + assign ulp = quotient[2]|quotient[3]; + assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); + // Map to binary encoding + assign qsel[1] = quotient[3]|quotient[2]; + assign qsel[0] = quotient[3]|quotient[1]; + mux4 #(WIDTH+4) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); + mux2 #(WIDTH+4) mx5 (mdivi_temp, {WIDTH+4{1'b0}}, zero, mdivi); + csa #(WIDTH+4) csa1 (mdivi, SumN, {CarryN[WIDTH+3:1], ulp}, Sum, Carry); + // regs : save CSA + flopenr #(WIDTH+4) reg1 (clk, reset, enable, Sum, SumN2); + flopenr #(WIDTH+4) reg2 (clk, reset, enable, Carry, CarryN2); + // OTF + ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); + otf #(WIDTH+1) otf2 (Qin, QMin, CshiftQ, CshiftQM, + clk, otfzero, enable, Qstar, QMstar); + + // Correction and generation of Remainder + adder #(WIDTH+4) cpa2 (SumN2[WIDTH+3:0], CarryN2[WIDTH+3:0], rem1); + // Add back +D as correction + csa #(WIDTH+4) csa2 (CarryN2[WIDTH+3:0], SumN2[WIDTH+3:0], divi1, SumR, CarryR); + adder #(WIDTH+4) cpa3 (SumR, CarryR, rem2); + // Choose remainder (Rem or Rem+D) + mux2 #(WIDTH+4) mx6 (rem1, rem2, rem1[WIDTH+3], rem3); + // Choose correct Q or QM + mux2 #(WIDTH+1) mx7 (Qstar, QMstar, rem1[WIDTH+3], Qt); + // Final results + assign rem0 = rem3[WIDTH:0]; + assign Q = Qt; + +endmodule // divide4 + +module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); + + input logic [3:0] quot; + + output logic [1:0] Qin; + output logic [1:0] QMin; + output logic CshiftQ; + output logic CshiftQM; + + logic [5:0] qout; + + // q = {+2, +1, -1, -2} + always_comb + casex(quot) + 4'b0000 : qout = 6'b00_11_0_0; + 4'b0001 : qout = 6'b10_01_1_0; + 4'b0010 : qout = 6'b11_10_1_0; + 4'b0100 : qout = 6'b01_00_0_1; + 4'b1000 : qout = 6'b10_01_0_1; + default : qout = 6'bxx_xx_x_x; + endcase // case (quot) + + assign {Qin, QMin, CshiftQ, CshiftQM} = qout; + +endmodule // ls_control + +// On-the-fly Conversion per Ercegovac/Lang +module otf #(parameter WIDTH=8) + (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); + + input logic [1:0] Qin, QMin; + input logic CshiftQ, CshiftQM; + input logic clk; + input logic reset; + input logic enable; + + output logic [WIDTH-1:0] R2Q; + output logic [WIDTH-1:0] R1Q; + + logic [WIDTH-1:0] Qstar, QMstar; + logic [WIDTH-1:0] M1Q, M2Q; + + // QM + mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); + flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); + // Q + mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); + flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); + + assign Qstar = R2Q; + assign QMstar = R1Q; + +endmodule // otf + +module adder #(parameter WIDTH=8) + (input logic [WIDTH-1:0] a, b, + output logic [WIDTH-1:0] y); + + assign y = a + b; + +endmodule // adder + +module fa (input logic a, b, c, output logic sum, carry); + + assign sum = a^b^c; + assign carry = a&b|a&c|b&c; + +endmodule // fa + +module csa #(parameter WIDTH=8) + (input logic [WIDTH-1:0] a, b, c, + output logic [WIDTH-1:0] sum, carry); + + logic [WIDTH:0] carry_temp; + genvar i; + generate + for (i=0;i B. LT and GT are both '0' if A = B. + +module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule // magcompare2b + +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare8 (LT, EQ, A, B); + + input logic [7:0] A; + input logic [7:0] B; + + logic [3:0] s; + logic [3:0] t; + logic [1:0] u; + logic [1:0] v; + logic GT; + //wire LT; + + output logic EQ; + output logic LT; + + magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); + + magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); + magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); + + magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); + + assign EQ = ~(GT | LT); + +endmodule // magcompare8 + +module exception_int #(parameter WIDTH=8) + (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); + + input logic [WIDTH-1:0] Q; + input logic [WIDTH-1:0] rem; + input logic [WIDTH-1:0] op1; + input logic S; + input logic div0; + input logic Max_N; + input logic D_NegOne; + + output logic [WIDTH-1:0] Qf; + output logic [WIDTH-1:0] remf; + + always_comb + case ({div0, S, Max_N, D_NegOne}) + 4'b0000 : Qf = Q; + 4'b0001 : Qf = Q; + 4'b0010 : Qf = Q; + 4'b0011 : Qf = Q; + 4'b0100 : Qf = Q; + 4'b0101 : Qf = Q; + 4'b0110 : Qf = Q; + 4'b0111 : Qf = {1'b1, {WIDTH-1{1'h0}}}; + 4'b1000 : Qf = {WIDTH{1'b1}}; + 4'b1001 : Qf = {WIDTH{1'b1}}; + 4'b1010 : Qf = {WIDTH{1'b1}}; + 4'b1011 : Qf = {WIDTH{1'b1}}; + 4'b1100 : Qf = {WIDTH{1'b1}}; + 4'b1101 : Qf = {WIDTH{1'b1}}; + 4'b1110 : Qf = {WIDTH{1'b1}}; + 4'b1111 : Qf = {WIDTH{1'b1}}; + default: Qf = Q; + endcase + + always_comb + case ({div0, S, Max_N, D_NegOne}) + 4'b0000 : remf = rem; + 4'b0001 : remf = rem; + 4'b0010 : remf = rem; + 4'b0011 : remf = rem; + 4'b0100 : remf = rem; + 4'b0101 : remf = rem; + 4'b0110 : remf = rem; + 4'b0111 : remf = {WIDTH{1'h0}}; + 4'b1000 : remf = op1; + 4'b1001 : remf = op1; + 4'b1010 : remf = op1; + 4'b1011 : remf = op1; + 4'b1100 : remf = op1; + 4'b1101 : remf = op1; + 4'b1110 : remf = op1; + 4'b1111 : remf = op1; + default: remf = rem; + endcase + +endmodule // exception_int + +/* verilator lint_on COMBDLY */ +/* verilator lint_on IMPLICIT */ diff --git a/pipelined/srt/stine/iter128.do b/pipelined/srt/stine/iter128.do new file mode 100644 index 000000000..de3369ff4 --- /dev/null +++ b/pipelined/srt/stine/iter128.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter128S.do b/pipelined/srt/stine/iter128S.do new file mode 100644 index 000000000..7ddc416d7 --- /dev/null +++ b/pipelined/srt/stine/iter128S.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter128S.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter32.do b/pipelined/srt/stine/iter32.do new file mode 100755 index 000000000..02bd32512 --- /dev/null +++ b/pipelined/srt/stine/iter32.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter32S.do b/pipelined/srt/stine/iter32S.do new file mode 100644 index 000000000..52475b291 --- /dev/null +++ b/pipelined/srt/stine/iter32S.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter32S.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter64.do b/pipelined/srt/stine/iter64.do new file mode 100755 index 000000000..651c88cfd --- /dev/null +++ b/pipelined/srt/stine/iter64.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/iter64S.do b/pipelined/srt/stine/iter64S.do new file mode 100644 index 000000000..18c37a27f --- /dev/null +++ b/pipelined/srt/stine/iter64S.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv lod.sv shift.sv intdiv.sv test_iter64S.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 999586700ns +quit diff --git a/pipelined/srt/stine/lod.sv b/pipelined/srt/stine/lod.sv new file mode 100755 index 000000000..a7496757c --- /dev/null +++ b/pipelined/srt/stine/lod.sv @@ -0,0 +1,182 @@ +/////////////////////////////////////////// +// lod.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module lod2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = B[0] | B[1]; + assign P = B[0] & ~B[1]; + +endmodule // lo2 + +module lod_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lod128 lod128 (ZP, ZV, B); + else if (WIDTH == 64) + lod64 lod64 (ZP, ZV, B); + else if (WIDTH == 32) + lod32 lod32 (ZP, ZV, B); + else if (WIDTH == 16) + lod16 lod16 (ZP, ZV, B); + else if (WIDTH == 8) + lod8 lod8 (ZP, ZV, B); + else if (WIDTH == 4) + lod4 lod4 (ZP, ZV, B); + +endmodule // lod_hier + +module lod4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lod2 l1(ZPa, ZVa, B[1:0]); + lod2 l2(ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod4 + +module lod8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lod4 l1(ZPa, ZVa, B[3:0]); + lod4 l2(ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod8 + +module lod16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lod8 l1(ZPa, ZVa, B[7:0]); + lod8 l2(ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod16 + +module lod32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lod16 l1(ZPa, ZVa, B[15:0]); + lod16 l2(ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod32 + +module lod64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lod32 l1(ZPa, ZVa, B[31:0]); + lod32 l2(ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod64 + +module lod128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lod64 l1(ZPa, ZVa, B[63:0]); + lod64 l2(ZPb, ZVb, B[127:64]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lod128 diff --git a/pipelined/srt/stine/lzd.do b/pipelined/srt/stine/lzd.do new file mode 100755 index 000000000..9ba020b3a --- /dev/null +++ b/pipelined/srt/stine/lzd.do @@ -0,0 +1,55 @@ +# Copyright 1991-2016 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog lod.sv lzd_tb.sv + +# start and run simulation +vsim -voptargs=+acc work.stimulus + +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -hex -r /stimulus/* + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 350 +configure wave -valuecolwidth 200 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 800ns +quit diff --git a/pipelined/srt/stine/lzd.sv b/pipelined/srt/stine/lzd.sv new file mode 100755 index 000000000..277b45931 --- /dev/null +++ b/pipelined/srt/stine/lzd.sv @@ -0,0 +1,182 @@ +/////////////////////////////////////////// +// lzd.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module lzd2 (P, V, B); + + input logic [1:0] B; + + output logic P; + output logic V; + + assign V = ~(B[0] & B[1]); + assign P = B[1]; + +endmodule // lzd2 + +module lzd_hier #(parameter WIDTH=8) + (input logic [WIDTH-1:0] B, + output logic [$clog2(WIDTH)-1:0] ZP, + output logic ZV); + + if (WIDTH == 128) + lzd128 lzd127 (ZP, ZV, B); + else if (WIDTH == 64) + lzd64 lzd64 (ZP, ZV, B); + else if (WIDTH == 32) + lzd32 lzd32 (ZP, ZV, B); + else if (WIDTH == 16) + lzd16 lzd16 (ZP, ZV, B); + else if (WIDTH == 8) + lzd8 lzd8 (ZP, ZV, B); + else if (WIDTH == 4) + lzd4 lzd4 (ZP, ZV, B); + +endmodule // lzd_hier + +module lzd4 (ZP, ZV, B); + + input logic [3:0] B; + + logic ZPa; + logic ZPb; + logic ZVa; + logic ZVb; + + output logic [1:0] ZP; + output logic ZV; + + lzd2 l1 (ZPa, ZVa, B[1:0]); + lzd2 l2 (ZPb, ZVb, B[3:2]); + + assign ZP[0:0] = ZVb ? ZPb : ZPa; + assign ZP[1] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd4 + +module lzd8 (ZP, ZV, B); + + input logic [7:0] B; + + logic [1:0] ZPa; + logic [1:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [2:0] ZP; + output logic ZV; + + lzd4 l1 (ZPa, ZVa, B[3:0]); + lzd4 l2 (ZPb, ZVb, B[7:4]); + + assign ZP[1:0] = ZVb ? ZPb : ZPa; + assign ZP[2] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd8 + +module lzd16 (ZP, ZV, B); + + input logic [15:0] B; + + logic [2:0] ZPa; + logic [2:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [3:0] ZP; + output logic ZV; + + lzd8 l1 (ZPa, ZVa, B[7:0]); + lzd8 l2 (ZPb, ZVb, B[15:8]); + + assign ZP[2:0] = ZVb ? ZPb : ZPa; + assign ZP[3] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd16 + +module lzd32 (ZP, ZV, B); + + input logic [31:0] B; + + logic [3:0] ZPa; + logic [3:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [4:0] ZP; + output logic ZV; + + lzd16 l1 (ZPa, ZVa, B[15:0]); + lzd16 l2 (ZPb, ZVb, B[31:16]); + + assign ZP[3:0] = ZVb ? ZPb : ZPa; + assign ZP[4] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd32 + +module lzd64 (ZP, ZV, B); + + input logic [63:0] B; + + logic [4:0] ZPa; + logic [4:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [5:0] ZP; + output logic ZV; + + lzd32 l1 (ZPa, ZVa, B[31:0]); + lzd32 l2 (ZPb, ZVb, B[63:32]); + + assign ZP[4:0] = ZVb ? ZPb : ZPa; + assign ZP[5] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd64 + +module lzd128 (ZP, ZV, B); + + input logic [127:0] B; + + logic [5:0] ZPa; + logic [5:0] ZPb; + logic ZVa; + logic ZVb; + + output logic [6:0] ZP; + output logic ZV; + + lzd64 l1 (ZPa, ZVa, B[64:0]); + lzd64 l2 (ZPb, ZVb, B[127:63]); + + assign ZP[5:0] = ZVb ? ZPb : ZPa; + assign ZP[6] = ~ZVb; + assign ZV = ZVa | ZVb; + +endmodule // lzd128 diff --git a/pipelined/srt/stine/lzd_tb.sv b/pipelined/srt/stine/lzd_tb.sv new file mode 100755 index 000000000..2980d5f4d --- /dev/null +++ b/pipelined/srt/stine/lzd_tb.sv @@ -0,0 +1,59 @@ +// +// File name : tb +// Title : test +// project : HW3 +// Library : test +// Purpose : definition of modules for testbench +// notes : +// +// Copyright Oklahoma State University +// + +// Top level stimulus module + +`timescale 1ns/1ps +module stimulus; + + logic [7:0] B; + logic [2:0] ZP; + logic ZV; + + logic clk; + + integer handle3; + integer desc3; + integer i; + + // instatiate part to test + lzd_hier #(8) dut (B, ZP, ZV); + + initial + begin + clk = 1'b1; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("lzd.out"); + desc3 = handle3; + end + + initial + begin + for (i=0; i < 256; i=i+1) + begin + // Put vectors before beginning of clk + @(posedge clk) + begin + B = $random; + end + @(negedge clk) + begin + $fdisplay(desc3, "%b || %b %b", B, ZP, ZV); + end + end // for (i=0; i < 256; i=i+1) + $finish;// + end // initial begin + +endmodule // stimulus diff --git a/pipelined/srt/stine/mux.sv b/pipelined/srt/stine/mux.sv new file mode 100755 index 000000000..d13045e6d --- /dev/null +++ b/pipelined/srt/stine/mux.sv @@ -0,0 +1,51 @@ +module mux2 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, + input logic s, + output logic [WIDTH-1:0] y); + + assign y = s ? d1 : d0; + +endmodule // mux2 + +module mux3 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, d2, + input logic [1:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[1] ? d2 : (s[0] ? d1 : d0); + +endmodule // mux3 + +module mux4 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, d2, d3, + input logic [1:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0); + +endmodule // mux4 + +module mux21x32 (Z, A, B, Sel); + + input logic [31:0] A; + input logic [31:0] B; + input logic Sel; + + output logic [31:0] Z; + + assign Z = Sel ? B : A; + +endmodule // mux21x32 + +module mux21x64 (Z, A, B, Sel); + + input logic [63:0] A; + input logic [63:0] B; + input logic Sel; + + output logic [63:0] Z; + + assign Z = Sel ? B : A; + +endmodule // mux21x64 + diff --git a/pipelined/srt/stine/notes b/pipelined/srt/stine/notes new file mode 100644 index 000000000..77311b177 --- /dev/null +++ b/pipelined/srt/stine/notes @@ -0,0 +1,30 @@ +Dividend x --(0.10101111), divisord --(0.11000101)(i -- 16(0.1100)2- 12) + +X = 175 (xAF) +D = 197 (xC5) + +X = 175/256 = 0.68359375 +D = 197/256 = 0.76953125 + +Note: Add lg(r) extra iterations due to shifting of computed q + q_{computed} = q / radix + +./srt4div 0.68359375 0.76953125 8 10 + +r=2 +X = 0.10011111 +D = 0.11000101 + +X = 159 (9F) +D = 197 (C5) + +X = 159/256 = 0.62109375 +D = 197/256 = 0.76953125 + +./srt2div 0.62109375 0.76953125 8 9 + + + + + + diff --git a/pipelined/srt/stine/otf4.in b/pipelined/srt/stine/otf4.in new file mode 100644 index 000000000..e448c1d09 --- /dev/null +++ b/pipelined/srt/stine/otf4.in @@ -0,0 +1,23 @@ +.i 4 +.o 6 +.ilb quot[3] quot[2] quot[1] quot[0] +.ob Qin[1] Qin[0] QMin[1] QMin[0] CshiftQ CshiftQM + +0000 001100 +0001 100110 +0010 111010 +0011 ------ +0100 010001 +0101 ------ +0110 ------ +0111 ------ +1000 100101 +1001 ------ +1010 ------ +1011 ------ +1100 ------ +1101 ------ +1110 ------ +1111 ------ + +.e \ No newline at end of file diff --git a/pipelined/srt/stine/pd_bad.png b/pipelined/srt/stine/pd_bad.png new file mode 100644 index 000000000..39df5cdaa Binary files /dev/null and b/pipelined/srt/stine/pd_bad.png differ diff --git a/pipelined/srt/stine/pd_cpa.png b/pipelined/srt/stine/pd_cpa.png new file mode 100644 index 000000000..d80104024 Binary files /dev/null and b/pipelined/srt/stine/pd_cpa.png differ diff --git a/pipelined/srt/stine/pd_csa.pdf b/pipelined/srt/stine/pd_csa.pdf new file mode 100644 index 000000000..00a67f88b Binary files /dev/null and b/pipelined/srt/stine/pd_csa.pdf differ diff --git a/pipelined/srt/stine/pd_csa.png b/pipelined/srt/stine/pd_csa.png new file mode 100644 index 000000000..9cf58ce48 Binary files /dev/null and b/pipelined/srt/stine/pd_csa.png differ diff --git a/pipelined/srt/stine/qslc_r4a2 b/pipelined/srt/stine/qslc_r4a2 new file mode 100755 index 000000000..6d6db4d49 Binary files /dev/null and b/pipelined/srt/stine/qslc_r4a2 differ diff --git a/pipelined/srt/stine/qslc_r4a2.c b/pipelined/srt/stine/qslc_r4a2.c new file mode 100644 index 000000000..8e68f9983 --- /dev/null +++ b/pipelined/srt/stine/qslc_r4a2.c @@ -0,0 +1,198 @@ +/* + Program: qslc_r4a2.c + Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) + User: James E. Stine + +*/ + +#include +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + printf("\tcase({D[5:3],Wmsbs})\n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf("\t\t10'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 12) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -4) + printf("0000"); + else if ((pla.tot) >= -13) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 14) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -15) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 15) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -16) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 16) + printf("1000"); + else if ((pla.tot) >= 4) + printf("0100"); + else if ((pla.tot) >= -6) + printf("0000"); + else if ((pla.tot) >= -18) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 18) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 6) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -20) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 20) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -22) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -24) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf("\tendcase\n"); + +} diff --git a/pipelined/srt/stine/run.sh b/pipelined/srt/stine/run.sh new file mode 100755 index 000000000..6dcde6c26 --- /dev/null +++ b/pipelined/srt/stine/run.sh @@ -0,0 +1,8 @@ +#!/bin/sh +vsim -do iter32S.do -c +vsim -do iter32.do -c +vsim -do iter64.do -c +vsim -do iter64S.do -c +vsim -do iter128.do -c +vsim -do iter128S.do -c + diff --git a/pipelined/srt/stine/shift.sv b/pipelined/srt/stine/shift.sv new file mode 100755 index 000000000..9738d93fe --- /dev/null +++ b/pipelined/srt/stine/shift.sv @@ -0,0 +1,73 @@ +/////////////////////////////////////////// +// shifters.sv +// +// Written: James.Stine@okstate.edu 1 February 2021 +// Modified: +// +// Purpose: Integer Divide instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +module shift_right #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + logic sign; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_right + +module shift_left #(parameter WIDTH=8) + (input logic [WIDTH-1:0] A, + input logic [$clog2(WIDTH)-1:0] Shift, + output logic [WIDTH-1:0] Z); + + logic [WIDTH-1:0] stage [$clog2(WIDTH):0]; + genvar i; + + assign stage[0] = A; + generate + for (i=0;i<$clog2(WIDTH);i=i+1) + begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], + {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, + Shift[$clog2(WIDTH)-i-1], + stage[i+1]); + end + endgenerate + assign Z = stage[$clog2(WIDTH)]; + +endmodule // shift_left + + + + diff --git a/pipelined/srt/stine/shift_left.do b/pipelined/srt/stine/shift_left.do new file mode 100755 index 000000000..a178c3cc0 --- /dev/null +++ b/pipelined/srt/stine/shift_left.do @@ -0,0 +1,55 @@ +# Copyright 1991-2016 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv shift.sv shift_left_tb.sv + +# start and run simulation +vsim -voptargs=+acc work.stimulus + +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -hex -r /stimulus/* + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 800ns +quit diff --git a/pipelined/srt/stine/shift_left_tb.sv b/pipelined/srt/stine/shift_left_tb.sv new file mode 100755 index 000000000..2d5d3dad8 --- /dev/null +++ b/pipelined/srt/stine/shift_left_tb.sv @@ -0,0 +1,71 @@ +// +// File name : tb +// Title : test +// project : HW3 +// Library : test +// Purpose : definition of modules for testbench +// notes : +// +// Copyright Oklahoma State University +// + +// Top level stimulus module + +`timescale 1ns/1ps + +`define XLEN 32 +module stimulus; + + logic [`XLEN-1:0] A; + logic [$clog2(`XLEN)-1:0] Shift; + logic [`XLEN-1:0] Z; + logic [`XLEN-1:0] Z_corr; + + //logic [63:0] A; + //logic [5:0] Shift; + //logic [63:0] Z; + //logic [63:0] Z_corr; + //logic [63:0] Z_orig; + + + logic clk; + + integer handle3; + integer desc3; + integer i; + + // instatiate part to test + shift_left dut1 (A, Shift, Z); + assign Z_corr = (A << Shift); + + initial + begin + clk = 1'b1; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("shift_left.out"); + desc3 = handle3; + end + + initial + begin + for (i=0; i < 256; i=i+1) + begin + // Put vectors before beginning of clk + @(posedge clk) + begin + A = $random; + Shift = $random; + end + @(negedge clk) + begin + $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr)); + end + end // for (i=0; i < 256; i=i+1) + $finish;// + end // initial begin + +endmodule // stimulus diff --git a/pipelined/srt/stine/shift_right.do b/pipelined/srt/stine/shift_right.do new file mode 100755 index 000000000..bf02e75c1 --- /dev/null +++ b/pipelined/srt/stine/shift_right.do @@ -0,0 +1,55 @@ +# Copyright 1991-2016 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog mux.sv shift.sv shift_right_tb.sv + +# start and run simulation +vsim -voptargs=+acc work.stimulus + +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -hex -r /stimulus/* + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 800ns +quit diff --git a/pipelined/srt/stine/shift_right_tb.sv b/pipelined/srt/stine/shift_right_tb.sv new file mode 100755 index 000000000..b35277484 --- /dev/null +++ b/pipelined/srt/stine/shift_right_tb.sv @@ -0,0 +1,64 @@ +// +// File name : tb +// Title : test +// project : HW3 +// Library : test +// Purpose : definition of modules for testbench +// notes : +// +// Copyright Oklahoma State University +// + +// Top level stimulus module + +`timescale 1ns/1ps + +`define XLEN 32 +module stimulus; + + logic [`XLEN-1:0] A; + logic [$clog2(`XLEN)-1:0] Shift; + logic [`XLEN-1:0] Z; + logic [`XLEN-1:0] Z_corr; + + logic clk; + + integer handle3; + integer desc3; + integer i; + + // instatiate part to test + shift_right dut1 (A, Shift, Z); + assign Z_corr = (A >> Shift); + + initial + begin + clk = 1'b1; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("shift_right.out"); + desc3 = handle3; + #250 $finish; + end + + initial + begin + for (i=0; i < 128; i=i+1) + begin + // Put vectors before beginning of clk + @(posedge clk) + begin + A = $random; + Shift = $random; + end + @(negedge clk) + begin + $fdisplay(desc3, "%h %h || %h %h | %b", A, Shift, Z, Z_corr, (Z == Z_corr)); + end + end // @(negedge clk) + end // for (j=0; j < 32; j=j+1) + +endmodule // stimulus diff --git a/pipelined/srt/stine/shifter.sv b/pipelined/srt/stine/shifter.sv new file mode 100755 index 000000000..779a02a47 --- /dev/null +++ b/pipelined/srt/stine/shifter.sv @@ -0,0 +1,18 @@ +module shifter_right(input logic signed [63:0] a, + input logic [ 5:0] shamt, + output logic signed [63:0] y); + + + y = a >> shamt; + +endmodule // shifter_right + +module shifter_left(input logic signed [63:0] a, + input logic [ 5:0] shamt, + output logic signed [63:0] y); + + + y = a << shamt; + +endmodule // shifter_right + diff --git a/pipelined/srt/stine/srt2div b/pipelined/srt/stine/srt2div new file mode 100755 index 000000000..92734241f Binary files /dev/null and b/pipelined/srt/stine/srt2div differ diff --git a/pipelined/srt/stine/srt2div.c b/pipelined/srt/stine/srt2div.c new file mode 100755 index 000000000..9b404d94b --- /dev/null +++ b/pipelined/srt/stine/srt2div.c @@ -0,0 +1,114 @@ +#include "disp.h" + +// QSLC is for division by recuerrence for +// r=2 using a CPA - See 5.109 EL +int qst (double D, double prem) { + + int q; + + // For Debugging + printf("rw --> %lg\n", prem); + + if (prem >= 0.5) { + q = 1; + } else if (prem >= -0.5) { + q = 0; + } else { + q = -1; + } + return q; + +} + +/* + This routine performs a radix-2 SRT division + algorithm. The user inputs the numerator, the denominator, + and the number of iterations. It assumes that 0.5 <= D < 1. + +*/ + +int main(int argc, char* argv[]) { + + double P, N, D, Q, RQ, RD, RREM, scale; + int q; + int num_iter, i; + int prec; + int radix = 2; + + if (argc < 5) { + fprintf(stderr, + "Usage: %s numerator denominator num_iterations prec\n", + argv[0]); + exit(1); + } + sscanf(argv[1],"%lg", &N); + sscanf(argv[2],"%lg", &D); + sscanf(argv[3],"%d", &num_iter); + sscanf(argv[4],"%d", &prec); + // Round to precision + N = rne(N, prec); + D = rne(D, prec); + printf("N = "); + disp_bin(N, 3, prec, stdout); + printf("\n"); + printf("D = "); + disp_bin(D, 3, prec, stdout); + printf("\n"); + + Q = 0; + P = N * pow(2.0, -log2(radix)); + printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n", + N, D, N/D, num_iter); + for (scale = 1, i = 0; i < num_iter; i++) { + scale = scale * pow(2.0, -log2(radix)); + q = qst(flr(2*D, 1), 2*P); + printf("2*W[n] = "); + disp_bin(radix*P, 3, prec, stdout); + printf("\n"); + printf("q*D = "); + disp_bin(q*D, 3, prec, stdout); + printf("\n"); + printf("W[n+1] = "); + disp_bin(P ,3, prec, stdout); + printf("\n"); + // Recurrence + P = radix * P - q * D; + Q = Q + q*scale; + printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P); + printf("i = %d, q = %d", i, q); + printf(", Q = "); + disp_bin(Q, 3, prec, stdout); + printf(", W = "); + disp_bin(P, 3, prec, stdout); + printf("\n\n"); + } + if (P < 0) { + Q = Q - scale; + P = P + D; + printf("\nCorrecting Negative Remainder\n"); + printf("Q = %1.18lf, W = %1.18lf\n", Q, P); + printf("Q = "); + disp_bin(Q, 3, prec, stdout); + printf(", W = "); + disp_bin(P, 3, prec, stdout); + printf("\n"); + } + + // Output Results + RQ = N/D; + // Since q_{computed} = q / radix, multiply by radix + RD = Q * radix; + printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD); + printf("true = "); + disp_bin(RQ, 3, prec, stdout); + printf(", computed = "); + disp_bin(RD, 3, prec, stdout); + printf("\n\n"); + printf("REM = %1.18lf \n", P); + printf("REM = "); + disp_bin(P, 3, prec, stdout); + printf("\n\n"); + + return 0; + +} diff --git a/pipelined/srt/stine/srt4_pd.m b/pipelined/srt/stine/srt4_pd.m new file mode 100644 index 000000000..815c84c75 --- /dev/null +++ b/pipelined/srt/stine/srt4_pd.m @@ -0,0 +1,508 @@ +% +% PD Region for Np = 3; Nd = 4; +% w/CPA +% +% Clear all variables and screen +clear +clf +% Define the number of bits (input Dividend) +n = 4; +% +% Define Divisor Range +% Normalized Floating Point [Dmin,Dmax] = [1,2] +% Normalized Fixed Point [Dmin, Dmax] =[1/2,1] +% +Dminimum = 1.0/2; +Dmaximum = 2.0/2; +% Define an ulp +ulp = 2^(-n); +% radix = beta +beta = 4; +% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2 +% +% SD representations have alpha < beta - 1 +% +% alpha = ceil(beta/2) minimially redundant +% alpha = beta -1 maximally redundant (rho = 1) +% alpha = (beta-1)/2 nonredundant +% alpha > beta - 1 over-redundant +% +rho = 2/3; +% Calculation of max digit set +alpha = rho*(beta-1); +% Da contains digit set +q = []; +for i = -alpha:alpha + q = [q; i]; +end +% 4r(i-1)/D values +hold on +% figure(1) +grid off +for i = 1:length(q) + x = -rho+q(i):ulp:rho+q(i); + % Plot redundancy (overlap) Positive + z = [rho+q(i),rho+q(i)]; + y = [x(length(x))-q(i),0]; + % Plot redundancy (overlap) Negative + if (i ~= length(q)) + w = [-rho+q(i+1)-q(i+1),0]; + u = [-rho+q(i+1),-rho+q(i+1)]; + % plot(u,w,'b') + end + % plot(x,x-q(i)) + % plot(z,y,'r') + +end +% title('Robertson Diagram for Radix-4 SRT Divison') + +Np = 3; +Nd = 4; +Dmin = Dminimum; +Dmax = Dmaximum; +ulpd = 2^(-Nd); +ulpp = 2^(-Np); + +% +% Plot Atkins P-D plot +% Normalized Floating Point [Dmin,Dmax] = [1,2] +% Normalized Fixed Point [Dmin, Dmax] =[1/2,1] +% +Dmin = Dminimum; +Dmax = Dmaximum; +for i = 1:length(q) + D = Dmin:ulp:Dmax; + P1 = (rho+q(i))*D; + P2 = (-rho+q(i))*D; + hold on + p1 = plot(D,P1); + p1.Color = '#0000ff'; + p2 = plot(D,P2); + p2.Color = '#ff0000'; + axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum]) + xticks(D) + p1.LineWidth = 2.0; + p2.LineWidth = 2.0; +end + +% Let's make x/y axis binary +j = []; +for i=1:length(D) + j = [j disp_bin(D(i), 1, 4)]; +end +yk = []; +yk2 = []; +for i=-2.5:0.5:2.5; + yk = [yk disp_bin(i, 3, 3)]; + yk2 = [yk2 i]; +end +xtickangle(90) +xticklabels(j) +yticklabels(yk) + +% Let's draw allow points on PD plot +% Positive Portions +index = 1; +i = 0:ulpp:rho*beta*Dmaximum; +for j = Dmin:ulpd:Dmax + plot(j*ones(1,length(i)),i,'k') +end + +j = Dmin:ulpd:Dmax; +for i = 0:ulpp:rho*beta*Dmaximum + plot(j,i*ones(length(j)),'k') +end + +% Negative Portions +index = 1; +i = 0:-ulpp:rho*-beta*Dmaximum; +for j = Dmin:ulpd:Dmax + plot(j*ones(1,length(i)),i,'k') +end + +j = Dmin:ulpd:Dmax; +for i = 0:-ulpp:-rho*beta*Dmaximum + plot(j,i*ones(length(j)),'k') +end + +% Labels and Printing +xlh = xlabel(['Divisor (d)']); +%xlh.FontSize = 18; +xlh.Position(2) = xlh.Position(2) - 0.1; +ylh = ylabel(['P = 4 \cdot w_i']); +ylh.Position(1) = ylh.Position(1)-0.02; +%ylh.FontSize = 18; + +% Containment Values (placed manually although not bad) +m2 = [3/4 7/8 1.0 1.0 5/4 5/4 5/4 3/2 3/2]; +m1 = [1/4 1/4 1/4 1/4 1/2 1/2 1/2 1/2 1/2]; +m0 = [-1/4 -1/4 -1/4 -1/4 -1/2 -1/2 -1/2 -1/2 -1/2]; +m1b = [-3/4 -7/8 -1 -1 -5/4 -5/4 -5/4 -3/2 -3/2]; +x2 = Dmin:ulpd:Dmax; +s2 = stairs(x2, m2); +s2.Color = '#8f08d1'; +s2.LineWidth = 3.0; +%s2.LineStyle = '--'; +s1 = stairs(x2, m1); +s1.Color = '#8f08d1'; +s1.LineWidth = 3.0; +s0 = stairs(x2, m0); +s0.Color = '#8f08d1'; +s0.LineWidth = 3.0; +s1b = stairs(x2, m1b); +s1b.Color = '#8f08d1'; +s1b.LineWidth = 3.0; + +% Place manually Quotient (ugh) +j = Dmin+ulpd/2:ulpd:Dmax; +i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum; +text(j(1), i(1), '2') +text(j(1), i(2), '2') +text(j(1), i(3), '2') +text(j(1), i(4), '2') +text(j(1), i(5), '2') +text(j(1), i(6), '2') +text(j(1), i(7), '2') +text(j(1), i(8), '2') +text(j(1), i(9), '2') +text(j(1), i(10), '2') +text(j(1), i(11), '2') +text(j(1), i(12), '2') +text(j(1), i(13), '2') +text(j(1), i(14), '2') +text(j(1), i(15), '2') +text(j(1), i(16), '1') +text(j(1), i(17), '1') +text(j(1), i(18), '1') +text(j(1), i(19), '1') +text(j(1), i(20), '0') +text(j(1), i(21), '0') +text(j(1), i(22), '0') +text(j(1), i(23), '0') +text(j(1), i(24), '-1') +text(j(1), i(25), '-1') +text(j(1), i(26), '-1') +text(j(1), i(27), '-1') +text(j(1), i(28), '-2') +text(j(1), i(29), '-2') +text(j(1), i(30), '-2') +text(j(1), i(31), '-2') +text(j(1), i(32), '-2') +text(j(1), i(33), '-2') +text(j(1), i(34), '-2') +text(j(1), i(35), '-2') +text(j(1), i(36), '-2') +text(j(1), i(37), '-2') +text(j(1), i(38), '-2') +text(j(1), i(39), '-2') +text(j(1), i(40), '-2') +text(j(1), i(41), '-2') +text(j(1), i(42), '-2') + +text(j(2), i(1), '2') +text(j(2), i(2), '2') +text(j(2), i(3), '2') +text(j(2), i(4), '2') +text(j(2), i(5), '2') +text(j(2), i(6), '2') +text(j(2), i(7), '2') +text(j(2), i(8), '2') +text(j(2), i(9), '2') +text(j(2), i(10), '2') +text(j(2), i(11), '2') +text(j(2), i(12), '2') +text(j(2), i(13), '2') +text(j(2), i(14), '2') +text(j(2), i(15), '1') +text(j(2), i(16), '1') +text(j(2), i(17), '1') +text(j(2), i(18), '1') +text(j(2), i(19), '1') +text(j(2), i(20), '0') +text(j(2), i(21), '0') +text(j(2), i(22), '0') +text(j(2), i(23), '0') +text(j(2), i(24), '-1') +text(j(2), i(25), '-1') +text(j(2), i(26), '-1') +text(j(2), i(27), '-1') +text(j(2), i(28), '-1') +text(j(2), i(29), '-2') +text(j(2), i(30), '-2') +text(j(2), i(31), '-2') +text(j(2), i(32), '-2') +text(j(2), i(33), '-2') +text(j(2), i(34), '-2') +text(j(2), i(35), '-2') +text(j(2), i(36), '-2') +text(j(2), i(37), '-2') +text(j(2), i(38), '-2') +text(j(2), i(39), '-2') +text(j(2), i(40), '-2') +text(j(2), i(41), '-2') +text(j(2), i(42), '-2') + +text(j(3), i(1), '2') +text(j(3), i(2), '2') +text(j(3), i(3), '2') +text(j(3), i(4), '2') +text(j(3), i(5), '2') +text(j(3), i(6), '2') +text(j(3), i(7), '2') +text(j(3), i(8), '2') +text(j(3), i(9), '2') +text(j(3), i(10), '2') +text(j(3), i(11), '2') +text(j(3), i(12), '2') +text(j(3), i(13), '2') +text(j(3), i(14), '1') +text(j(3), i(15), '1') +text(j(3), i(16), '1') +text(j(3), i(17), '1') +text(j(3), i(18), '1') +text(j(3), i(19), '1') +text(j(3), i(20), '0') +text(j(3), i(21), '0') +text(j(3), i(22), '0') +text(j(3), i(23), '0') +text(j(3), i(24), '-1') +text(j(3), i(25), '-1') +text(j(3), i(26), '-1') +text(j(3), i(27), '-1') +text(j(3), i(28), '-1') +text(j(3), i(29), '-1') +text(j(3), i(30), '-2') +text(j(3), i(31), '-2') +text(j(3), i(32), '-2') +text(j(3), i(33), '-2') +text(j(3), i(34), '-2') +text(j(3), i(35), '-2') +text(j(3), i(36), '-2') +text(j(3), i(37), '-2') +text(j(3), i(38), '-2') +text(j(3), i(39), '-2') +text(j(3), i(40), '-2') +text(j(3), i(41), '-2') +text(j(3), i(42), '-2') + +text(j(4), i(1), '2') +text(j(4), i(2), '2') +text(j(4), i(3), '2') +text(j(4), i(4), '2') +text(j(4), i(5), '2') +text(j(4), i(6), '2') +text(j(4), i(7), '2') +text(j(4), i(8), '2') +text(j(4), i(9), '2') +text(j(4), i(10), '2') +text(j(4), i(11), '2') +text(j(4), i(12), '2') +text(j(4), i(13), '2') +text(j(4), i(14), '1') +text(j(4), i(15), '1') +text(j(4), i(16), '1') +text(j(4), i(17), '1') +text(j(4), i(18), '1') +text(j(4), i(19), '1') +text(j(4), i(20), '0') +text(j(4), i(21), '0') +text(j(4), i(22), '0') +text(j(4), i(23), '0') +text(j(4), i(24), '-1') +text(j(4), i(25), '-1') +text(j(4), i(26), '-1') +text(j(4), i(27), '-1') +text(j(4), i(28), '-1') +text(j(4), i(29), '-1') +text(j(4), i(30), '-2') +text(j(4), i(31), '-2') +text(j(4), i(32), '-2') +text(j(4), i(33), '-2') +text(j(4), i(34), '-2') +text(j(4), i(35), '-2') +text(j(4), i(36), '-2') +text(j(4), i(37), '-2') +text(j(4), i(38), '-2') +text(j(4), i(39), '-2') +text(j(4), i(40), '-2') +text(j(4), i(41), '-2') +text(j(4), i(42), '-2') + +text(j(5), i(1), '2') +text(j(5), i(2), '2') +text(j(5), i(3), '2') +text(j(5), i(4), '2') +text(j(5), i(5), '2') +text(j(5), i(6), '2') +text(j(5), i(7), '2') +text(j(5), i(8), '2') +text(j(5), i(9), '2') +text(j(5), i(10), '2') +text(j(5), i(11), '2') +text(j(5), i(12), '1') +text(j(5), i(13), '1') +text(j(5), i(14), '1') +text(j(5), i(15), '1') +text(j(5), i(16), '1') +text(j(5), i(17), '1') +text(j(5), i(18), '0') +text(j(5), i(19), '0') +text(j(5), i(20), '0') +text(j(5), i(21), '0') +text(j(5), i(22), '0') +text(j(5), i(23), '0') +text(j(5), i(24), '0') +text(j(5), i(25), '0') +text(j(5), i(26), '-1') +text(j(5), i(27), '-1') +text(j(5), i(28), '-1') +text(j(5), i(29), '-1') +text(j(5), i(30), '-1') +text(j(5), i(31), '-1') +text(j(5), i(32), '-2') +text(j(5), i(33), '-2') +text(j(5), i(34), '-2') +text(j(5), i(35), '-2') +text(j(5), i(36), '-2') +text(j(5), i(37), '-2') +text(j(5), i(38), '-2') +text(j(5), i(39), '-2') +text(j(5), i(40), '-2') +text(j(5), i(41), '-2') +text(j(5), i(42), '-2') + +text(j(6), i(1), '2') +text(j(6), i(2), '2') +text(j(6), i(3), '2') +text(j(6), i(4), '2') +text(j(6), i(5), '2') +text(j(6), i(6), '2') +text(j(6), i(7), '2') +text(j(6), i(8), '2') +text(j(6), i(9), '2') +text(j(6), i(10), '2') +text(j(6), i(11), '2') +text(j(6), i(12), '1') +text(j(6), i(13), '1') +text(j(6), i(14), '1') +text(j(6), i(15), '1') +text(j(6), i(16), '1') +text(j(6), i(17), '1') +text(j(6), i(18), '0') +text(j(6), i(19), '0') +text(j(6), i(20), '0') +text(j(6), i(21), '0') +text(j(6), i(22), '0') +text(j(6), i(23), '0') +text(j(6), i(24), '0') +text(j(6), i(25), '0') +text(j(6), i(26), '-1') +text(j(6), i(27), '-1') +text(j(6), i(28), '-1') +text(j(6), i(29), '-1') +text(j(6), i(30), '-1') +text(j(6), i(31), '-1') +text(j(6), i(32), '-2') +text(j(6), i(33), '-2') +text(j(6), i(34), '-2') +text(j(6), i(35), '-2') +text(j(6), i(36), '-2') +text(j(6), i(37), '-2') +text(j(6), i(38), '-2') +text(j(6), i(39), '-2') +text(j(6), i(40), '-2') +text(j(6), i(41), '-2') +text(j(6), i(42), '-2') + +text(j(7), i(1), '2') +text(j(7), i(2), '2') +text(j(7), i(3), '2') +text(j(7), i(4), '2') +text(j(7), i(5), '2') +text(j(7), i(6), '2') +text(j(7), i(7), '2') +text(j(7), i(8), '2') +text(j(7), i(9), '2') +text(j(7), i(10), '2') +text(j(7), i(11), '2') +text(j(7), i(12), '1') +text(j(7), i(13), '1') +text(j(7), i(14), '1') +text(j(7), i(15), '1') +text(j(7), i(16), '1') +text(j(7), i(17), '1') +text(j(7), i(18), '0') +text(j(7), i(19), '0') +text(j(7), i(20), '0') +text(j(7), i(21), '0') +text(j(7), i(22), '0') +text(j(7), i(23), '0') +text(j(7), i(24), '0') +text(j(7), i(25), '0') +text(j(7), i(26), '-1') +text(j(7), i(27), '-1') +text(j(7), i(28), '-1') +text(j(7), i(29), '-1') +text(j(7), i(30), '-1') +text(j(7), i(31), '-1') +text(j(7), i(32), '-2') +text(j(7), i(33), '-2') +text(j(7), i(34), '-2') +text(j(7), i(35), '-2') +text(j(7), i(36), '-2') +text(j(7), i(37), '-2') +text(j(7), i(38), '-2') +text(j(7), i(39), '-2') +text(j(7), i(40), '-2') +text(j(7), i(41), '-2') +text(j(7), i(42), '-2') + +text(j(8), i(1), '2') +text(j(8), i(2), '2') +text(j(8), i(3), '2') +text(j(8), i(4), '2') +text(j(8), i(5), '2') +text(j(8), i(6), '2') +text(j(8), i(7), '2') +text(j(8), i(8), '2') +text(j(8), i(9), '2') +text(j(8), i(10), '1') +text(j(8), i(11), '1') +text(j(8), i(12), '1') +text(j(8), i(13), '1') +text(j(8), i(14), '1') +text(j(8), i(15), '1') +text(j(8), i(16), '1') +text(j(8), i(17), '1') +text(j(8), i(18), '0') +text(j(8), i(19), '0') +text(j(8), i(20), '0') +text(j(8), i(21), '0') +text(j(8), i(22), '0') +text(j(8), i(23), '0') +text(j(8), i(24), '0') +text(j(8), i(25), '0') +text(j(8), i(26), '-1') +text(j(8), i(27), '-1') +text(j(8), i(28), '-1') +text(j(8), i(29), '-1') +text(j(8), i(30), '-2') +text(j(8), i(31), '-2') +text(j(8), i(32), '-2') +text(j(8), i(33), '-2') +text(j(8), i(34), '-2') +text(j(8), i(35), '-2') +text(j(8), i(36), '-2') +text(j(8), i(37), '-2') +text(j(8), i(38), '-2') +text(j(8), i(39), '-2') +text(j(8), i(40), '-2') +text(j(8), i(41), '-2') +text(j(8), i(42), '-2') + +print -dpng pd_cpa.png + + + + + diff --git a/pipelined/srt/stine/srt4_pd2.m b/pipelined/srt/stine/srt4_pd2.m new file mode 100644 index 000000000..9c60923aa --- /dev/null +++ b/pipelined/srt/stine/srt4_pd2.m @@ -0,0 +1,333 @@ +% +% Clear all variables and screen +clear +clf +% Define the number of bits (input Dividend) +n = 4; +% +% Define Divisor Range +% Normalized Floating Point [Dmin,Dmax] = [1,2] +% Normalized Fixed Point [Dmin, Dmax] =[1/2,1] +% +Dminimum = 1.0/2; +Dmaximum = 2.0/2; +% Define an ulp +ulp = 2^(-n); +% radix = beta +beta = 4; +% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2 +% +% SD representations have alpha < beta - 1 +% +% alpha = ceil(beta/2) minimially redundant +% alpha = beta -1 maximally redundant (rho = 1) +% alpha = (beta-1)/2 nonredundant +% alpha > beta - 1 over-redundant +% +rho = 2/3; +% Calculation of max digit set +alpha = rho*(beta-1); +% Da contains digit set +q = []; +for i = -alpha:alpha + q = [q; i]; +end +% 4r(i-1)/D values +hold on +% figure(1) +grid off +for i = 1:length(q) + x = -rho+q(i):ulp:rho+q(i); + % Plot redundancy (overlap) Positive + z = [rho+q(i),rho+q(i)]; + y = [x(length(x))-q(i),0]; + % Plot redundancy (overlap) Negative + if (i ~= length(q)) + w = [-rho+q(i+1)-q(i+1),0]; + u = [-rho+q(i+1),-rho+q(i+1)]; + % plot(u,w,'b') + end + % plot(x,x-q(i)) + % plot(z,y,'r') + +end +% title('Robertson Diagram for Radix-4 SRT Divison') + +Np = 3; +Nd = 3; +Dmin = Dminimum; +Dmax = Dmaximum; +ulpd = 2^(-Nd); +ulpp = 2^(-Np); + +% +% Plot Atkins P-D plot +% Normalized Floating Point [Dmin,Dmax] = [1,2] +% Normalized Fixed Point [Dmin, Dmax] =[1/2,1] +% +Dmin = Dminimum; +Dmax = Dmaximum; +for i = 1:length(q) + D = Dmin:ulpd:Dmax; + P1 = (rho+q(i))*D; + P2 = (-rho+q(i))*D; + hold on + p1 = plot(D,P1,'b'); + p2 = plot(D,P2,'r'); + axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum]) + xticks(D) + p1.LineWidth = 2.0; + p2.LineWidth = 2.0; +end + +% Let's make x axis binary +D = Dmin:ulpd:Dmax; +j = []; +for i=1:length(D) + j = [j disp_bin(D(i), 1, 3)]; +end +yk = []; +yk2 = []; +for i=-2.5:0.5:2.5; + yk = [yk disp_bin(i, 3, 3)]; + yk2 = [yk2 i]; +end +xtickangle(90) +xticklabels(j) +yticklabels(yk) + +% Let's draw allow points on PD plot +% Positive Portions +index = 1; +i = 0:ulpp:rho*beta*Dmaximum; +for j = Dmin:ulpd:Dmax + plot(j*ones(1,length(i)),i,'k'); +end + +j = Dmin:ulpd:Dmax; +for i = 0:ulpp:rho*beta*Dmaximum + plot(j,i*ones(length(j)),'k'); +end + +% Negative Portions +index = 1; +i = 0:-ulpp:rho*-beta*Dmaximum; +for j = Dmin:ulpd:Dmax + plot(j*ones(1,length(i)),i,'k'); +end + +j = Dmin:ulpd:Dmax; +for i = 0:-ulpp:-rho*beta*Dmaximum + plot(j,i*ones(length(j)),'k'); +end + +% Labels and Printing +xlh = xlabel(['Divisor (d)']); +xlh.Position(2) = xlh.Position(2) - 0.1; +xlh.FontSize = 18; +ylh = ylabel(['P = 4 \cdot w_i']); +ylh.Position(1) = ylh.Position(1)-0.02; +ylh.FontSize = 18; + +% Containment Values (placed manually although not bad) +m2 = [5/6 1.0 5/4 11/8 11/8]; +m1 = [1/4 1/4 1/2 1/2 1/2]; +m0 = [-1/4 -1/4 -1/2 -1/2 -1/2]; +m1b = [-5/6 -1 -5/4 -11/8 -11/8]; +x2 = Dmin:ulpd:Dmax; +s2 = stairs(x2, m2); +s2.Color = '#8f08d1'; +s2.LineWidth = 3.0; +s1 = stairs(x2, m1); +s1.Color = '#8f08d1'; +s1.LineWidth = 3.0; +s0 = stairs(x2, m0); +s0.Color = '#8f08d1'; +s0.LineWidth = 3.0; +s1b = stairs(x2, m1b); +s1b.Color = '#8f08d1'; +s1b.LineWidth = 3.0; + +% Place manually Quotient (ugh) +j = Dmin+ulpd/2:ulpd:Dmax; +i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum; +text(j(1), i(1), '2') +text(j(1), i(2), '2') +text(j(1), i(3), '2') +text(j(1), i(4), '2') +text(j(1), i(5), '2') +text(j(1), i(6), '2') +text(j(1), i(7), '2') +text(j(1), i(8), '2') +text(j(1), i(9), '2') +text(j(1), i(10), '2') +text(j(1), i(11), '2') +text(j(1), i(12), '2') +text(j(1), i(13), '2') +text(j(1), i(14), '2') +error1 = text(j(1), i(15), 'Full Precision', 'FontSize', 16); +text(j(1), i(16), '1') +text(j(1), i(17), '1') +text(j(1), i(18), '1') +text(j(1), i(19), '1') +text(j(1), i(20), '0') +text(j(1), i(21), '0') +text(j(1), i(22), '0') +text(j(1), i(23), '0') +text(j(1), i(24), '-1') +text(j(1), i(25), '-1') +text(j(1), i(26), '-1') +text(j(1), i(27), '-1') +error2 = text(j(1), i(28), 'Full Precision', 'FontSize', 16); +text(j(1), i(29), '-2') +text(j(1), i(30), '-2') +text(j(1), i(31), '-2') +text(j(1), i(32), '-2') +text(j(1), i(33), '-2') +text(j(1), i(34), '-2') +text(j(1), i(35), '-2') +text(j(1), i(36), '-2') +text(j(1), i(37), '-2') +text(j(1), i(38), '-2') +text(j(1), i(39), '-2') +text(j(1), i(40), '-2') +text(j(1), i(41), '-2') +text(j(1), i(42), '-2') + +text(j(2), i(1), '2') +text(j(2), i(2), '2') +text(j(2), i(3), '2') +text(j(2), i(4), '2') +text(j(2), i(5), '2') +text(j(2), i(6), '2') +text(j(2), i(7), '2') +text(j(2), i(8), '2') +text(j(2), i(9), '2') +text(j(2), i(10), '2') +text(j(2), i(11), '2') +text(j(2), i(12), '2') +text(j(2), i(13), '2') +text(j(2), i(14), '1') +text(j(2), i(15), '1') +text(j(2), i(16), '1') +text(j(2), i(17), '1') +text(j(2), i(18), '1') +text(j(2), i(19), '1') +text(j(2), i(20), '0') +text(j(2), i(21), '0') +text(j(2), i(22), '0') +text(j(2), i(23), '0') +text(j(2), i(24), '-1') +text(j(2), i(25), '-1') +text(j(2), i(26), '-1') +text(j(2), i(27), '-1') +text(j(2), i(28), '-1') +text(j(2), i(29), '-1') +text(j(2), i(30), '-2') +text(j(2), i(31), '-2') +text(j(2), i(32), '-2') +text(j(2), i(33), '-2') +text(j(2), i(34), '-2') +text(j(2), i(35), '-2') +text(j(2), i(36), '-2') +text(j(2), i(37), '-2') +text(j(2), i(38), '-2') +text(j(2), i(39), '-2') +text(j(2), i(40), '-2') +text(j(2), i(41), '-2') +text(j(2), i(42), '-2') + +text(j(3), i(1), '2') +text(j(3), i(2), '2') +text(j(3), i(3), '2') +text(j(3), i(4), '2') +text(j(3), i(5), '2') +text(j(3), i(6), '2') +text(j(3), i(7), '2') +text(j(3), i(8), '2') +text(j(3), i(9), '2') +text(j(3), i(10), '2') +text(j(3), i(11), '2') +text(j(3), i(12), '1') +text(j(3), i(13), '1') +text(j(3), i(14), '1') +text(j(3), i(15), '1') +text(j(3), i(16), '1') +text(j(3), i(17), '1') +text(j(3), i(18), '0') +text(j(3), i(19), '0') +text(j(3), i(20), '0') +text(j(3), i(21), '0') +text(j(3), i(22), '0') +text(j(3), i(23), '0') +text(j(3), i(24), '0') +text(j(3), i(25), '0') +text(j(3), i(26), '-1') +text(j(3), i(27), '-1') +text(j(3), i(28), '-1') +text(j(3), i(29), '-1') +text(j(3), i(30), '-1') +text(j(3), i(31), '-1') +text(j(3), i(32), '-2') +text(j(3), i(33), '-2') +text(j(3), i(34), '-2') +text(j(3), i(35), '-2') +text(j(3), i(36), '-2') +text(j(3), i(37), '-2') +text(j(3), i(38), '-2') +text(j(3), i(39), '-2') +text(j(3), i(40), '-2') +text(j(3), i(41), '-2') +text(j(3), i(42), '-2') + +text(j(4), i(1), '2') +text(j(4), i(2), '2') +text(j(4), i(3), '2') +text(j(4), i(4), '2') +text(j(4), i(5), '2') +text(j(4), i(6), '2') +text(j(4), i(7), '2') +text(j(4), i(8), '2') +text(j(4), i(9), '2') +text(j(4), i(10), '2') +text(j(4), i(11), '1') +text(j(4), i(12), '1') +text(j(4), i(13), '1') +text(j(4), i(14), '1') +text(j(4), i(15), '1') +text(j(4), i(16), '1') +text(j(4), i(17), '1') +text(j(4), i(18), '0') +text(j(4), i(19), '0') +text(j(4), i(20), '0') +text(j(4), i(21), '0') +text(j(4), i(22), '0') +text(j(4), i(23), '0') +text(j(4), i(24), '0') +text(j(4), i(25), '0') +text(j(4), i(26), '-1') +text(j(4), i(27), '-1') +text(j(4), i(28), '-1') +text(j(4), i(29), '-1') +text(j(4), i(30), '-1') +text(j(4), i(31), '-1') +text(j(4), i(32), '-1') +text(j(4), i(33), '-2') +text(j(4), i(34), '-2') +text(j(4), i(35), '-2') +text(j(4), i(36), '-2') +text(j(4), i(37), '-2') +text(j(4), i(38), '-2') +text(j(4), i(39), '-2') +text(j(4), i(40), '-2') +text(j(4), i(41), '-2') +text(j(4), i(42), '-2') + + + +print -dpng pd_bad.png + + + + + diff --git a/pipelined/srt/stine/srt4_pd3.m b/pipelined/srt/stine/srt4_pd3.m new file mode 100644 index 000000000..695b3c7f6 --- /dev/null +++ b/pipelined/srt/stine/srt4_pd3.m @@ -0,0 +1,855 @@ +% +% Clear all variables and screen +clear +clf +% Define the number of bits (input Dividend) +n = 4; +% +% Define Divisor Range +% Normalized Floating Point [Dmin,Dmax] = [1,2] +% Normalized Fixed Point [Dmin, Dmax] =[1/2,1] +% +Dminimum = 1.0/2; +Dmaximum = 2.0/2; +% Define an ulp +ulp = 2^(-n); +% radix = beta +beta = 4; +% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2 +% +% SD representations have alpha < beta - 1 +% +% alpha = ceil(beta/2) minimially redundant +% alpha = beta -1 maximally redundant (rho = 1) +% alpha = (beta-1)/2 nonredundant +% alpha > beta - 1 over-redundant +% +rho = 2/3; +% Calculation of max digit set +alpha = rho*(beta-1); +% Da contains digit set +q = []; +for i = -alpha:alpha + q = [q; i]; +end +% 4r(i-1)/D values +hold on +% figure(1) +grid off +for i = 1:length(q) + x = -rho+q(i):ulp:rho+q(i); + % Plot redundancy (overlap) Positive + z = [rho+q(i),rho+q(i)]; + y = [x(length(x))-q(i),0]; + % Plot redundancy (overlap) Negative + if (i ~= length(q)) + w = [-rho+q(i+1)-q(i+1),0]; + u = [-rho+q(i+1),-rho+q(i+1)]; + % plot(u,w,'b') + end + % plot(x,x-q(i)) + % plot(z,y,'r') + +end +% title('Robertson Diagram for Radix-4 SRT Divison') + +% +% Plot Atkins P-D plot +% Normalized Floating Point [Dmin,Dmax] = [1,2] +% Normalized Fixed Point [Dmin, Dmax] =[1/2,1] +% +Dmin = Dminimum; +Dmax = Dmaximum; +for i = 1:length(q) + D = Dmin:ulp:Dmax; + P1 = (rho+q(i))*D; + P2 = (-rho+q(i))*D; + hold on + p1 = plot(D,P1,'b'); + p2 = plot(D,P2,'r'); + axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum]) + xticks(D) + p1.LineWidth = 2.0; + p2.LineWidth = 2.0; +end + +% Let's make x axis binary +j = []; +for i=1:length(D) + j = [j disp_bin(D(i), 1, 4)]; +end +yk = []; +yk2 = []; +for i=-2.5:0.5:2.5; + yk = [yk disp_bin(i, 3, 4)]; + yk2 = [yk2 i]; +end +xtickangle(90) +xticklabels(j) +yticklabels(yk) + +Np = 4; +Nd = 4; +Dmin = Dminimum; +Dmax = Dmaximum; +ulpd = 2^(-Nd); +ulpp = 2^(-Np); + +% Let's draw allow points on PD plot +% Positive Portions +index = 1; +i = 0:ulpp:rho*beta*Dmaximum; +for j = Dmin:ulpd:Dmax + plot(j*ones(1,length(i)),i,'k'); +end + +j = Dmin:ulpd:Dmax; +for i = 0:ulpp:rho*beta*Dmaximum + plot(j,i*ones(length(j)),'k'); +end + +% Negative Portions +index = 1; +i = 0:-ulpp:rho*-beta*Dmaximum; +for j = Dmin:ulpd:Dmax + plot(j*ones(1,length(i)),i,'k'); +end + +j = Dmin:ulpd:Dmax; +for i = 0:-ulpp:-rho*beta*Dmaximum + plot(j,i*ones(length(j)),'k'); +end + +% Labels and Printing +xlh = xlabel(['Divisor (d)']); +xlh.Position(2) = xlh.Position(2) - 0.1; +%xlh.FontSize = 18; +ylh = ylabel(['P = 4 \cdot w_i']); +ylh.Position(1) = ylh.Position(1)-0.02; +%ylh.FontSize = 18; + +% Containment Values (placed manually although not bad) +m2 = [3/4 7/8 15/16 1.0 9/8 19/16 5/4 6/4 6/4]; +m1 = [1/4 1/4 1/4 1/4 3/8 3/8 1/2 1/2 1/2]; +m0 = [-1/4 -3/8 -3/8 -3/8 -1/2 -1/2 -1/2 -1/2 -1/2]; +m1b = [-13/16 -15/16 -1 -9/8 -5/4 -5/4 -11/8 -6/4 -6/4]; +x2 = Dmin:ulpd:Dmax; +s2 = stairs(x2, m2); +s2.Color = '#8f08d1'; +s2.LineWidth = 3.0; +s1 = stairs(x2, m1); +s1.Color = '#8f08d1'; +s1.LineWidth = 3.0; +s0 = stairs(x2, m0); +s0.Color = '#8f08d1'; +s0.LineWidth = 3.0; +s1b = stairs(x2, m1b); +s1b.Color = '#8f08d1'; +s1b.LineWidth = 3.0; + +% Place manually Quotient (ugh) +j = Dmin+ulpd/2:ulpd:Dmax; +i = rho*beta*Dmaximum-ulpp:-ulpp:-rho*beta*Dmaximum; + +% 1 +text(j(1), i(1), '2') +text(j(1), i(2), '2') +text(j(1), i(3), '2') +text(j(1), i(4), '2') +text(j(1), i(5), '2') +text(j(1), i(6), '2') +text(j(1), i(7), '2') +text(j(1), i(8), '2') +text(j(1), i(9), '2') +text(j(1), i(10), '2') +text(j(1), i(11), '2') +text(j(1), i(12), '2') +text(j(1), i(13), '2') +text(j(1), i(14), '2') +text(j(1), i(15), '2') +text(j(1), i(16), '2') +text(j(1), i(17), '2') +text(j(1), i(18), '2') +text(j(1), i(19), '2') +text(j(1), i(20), '2') +text(j(1), i(21), '2') +text(j(1), i(22), '2') +text(j(1), i(23), '2') +text(j(1), i(24), '2') +text(j(1), i(25), '2') +text(j(1), i(26), '2') +text(j(1), i(27), '2') +text(j(1), i(28), '2') +text(j(1), i(29), '2') +text(j(1), i(30), '2') +text(j(1), i(31), '1') +text(j(1), i(32), '1') +text(j(1), i(33), '1') +text(j(1), i(34), '1') +text(j(1), i(35), '1') +text(j(1), i(36), '1') +text(j(1), i(37), '1') +text(j(1), i(38), '1') +text(j(1), i(39), '0') +text(j(1), i(40), '0') +text(j(1), i(41), '0') +text(j(1), i(42), '0') + +text(j(1), i(43), '0') +text(j(1), i(44), '0') +text(j(1), i(45), '0') +text(j(1), i(46), '0') +text(j(1), i(47), '-1') +text(j(1), i(48), '-1') +text(j(1), i(49), '-1') +text(j(1), i(50), '-1') +text(j(1), i(51), '-1') +text(j(1), i(52), '-1') +text(j(1), i(53), '-1') +text(j(1), i(54), '-1') +text(j(1), i(55), '-1') +text(j(1), i(56), '-2') +text(j(1), i(57), '-2') +text(j(1), i(58), '-2') +text(j(1), i(59), '-2') +text(j(1), i(60), '-2') +text(j(1), i(61), '-2') +text(j(1), i(62), '-2') +text(j(1), i(63), '-2') +text(j(1), i(64), '-2') +text(j(1), i(65), '-2') +text(j(1), i(66), '-2') +text(j(1), i(67), '-2') +text(j(1), i(68), '-2') +text(j(1), i(69), '-2') +text(j(1), i(70), '-2') +text(j(1), i(71), '-2') +text(j(1), i(72), '-2') +text(j(1), i(73), '-2') +text(j(1), i(74), '-2') +text(j(1), i(75), '-2') +text(j(1), i(76), '-2') +text(j(1), i(77), '-2') +text(j(1), i(78), '-2') +text(j(1), i(79), '-2') +text(j(1), i(80), '-2') +text(j(1), i(81), '-2') +text(j(1), i(82), '-2') +text(j(1), i(83), '-2') +text(j(1), i(84), '-2') + +text(j(2), i(1), '2') +text(j(2), i(2), '2') +text(j(2), i(3), '2') +text(j(2), i(4), '2') +text(j(2), i(5), '2') +text(j(2), i(6), '2') +text(j(2), i(7), '2') +text(j(2), i(8), '2') +text(j(2), i(9), '2') +text(j(2), i(10), '2') +text(j(2), i(11), '2') +text(j(2), i(12), '2') +text(j(2), i(13), '2') +text(j(2), i(14), '2') +text(j(2), i(15), '2') +text(j(2), i(16), '2') +text(j(2), i(17), '2') +text(j(2), i(18), '2') +text(j(2), i(19), '2') +text(j(2), i(20), '2') +text(j(2), i(21), '2') +text(j(2), i(22), '2') +text(j(2), i(23), '2') +text(j(2), i(24), '2') +text(j(2), i(25), '2') +text(j(2), i(26), '2') +text(j(2), i(27), '2') +text(j(2), i(28), '2') +text(j(2), i(29), '1') +text(j(2), i(30), '1') +text(j(2), i(31), '1') +text(j(2), i(32), '1') +text(j(2), i(33), '1') +text(j(2), i(34), '1') +text(j(2), i(35), '1') +text(j(2), i(36), '1') +text(j(2), i(37), '1') +text(j(2), i(38), '1') +text(j(2), i(39), '0') +text(j(2), i(40), '0') +text(j(2), i(41), '0') +text(j(2), i(42), '0') + +text(j(2), i(43), '0') +text(j(2), i(44), '0') +text(j(2), i(45), '0') +text(j(2), i(46), '0') +text(j(2), i(47), '0') +text(j(2), i(48), '0') +text(j(2), i(49), '-1') +text(j(2), i(50), '-1') +text(j(2), i(51), '-1') +text(j(2), i(52), '-1') +text(j(2), i(53), '-1') +text(j(2), i(54), '-1') +text(j(2), i(55), '-1') +text(j(2), i(56), '-1') +text(j(2), i(57), '-1') +text(j(2), i(58), '-2') +text(j(2), i(59), '-2') +text(j(2), i(60), '-2') +text(j(2), i(61), '-2') +text(j(2), i(62), '-2') +text(j(2), i(63), '-2') +text(j(2), i(64), '-2') +text(j(2), i(65), '-2') +text(j(2), i(66), '-2') +text(j(2), i(67), '-2') +text(j(2), i(68), '-2') +text(j(2), i(69), '-2') +text(j(2), i(70), '-2') +text(j(2), i(71), '-2') +text(j(2), i(72), '-2') +text(j(2), i(73), '-2') +text(j(2), i(74), '-2') +text(j(2), i(75), '-2') +text(j(2), i(76), '-2') +text(j(2), i(77), '-2') +text(j(2), i(78), '-2') +text(j(2), i(79), '-2') +text(j(2), i(80), '-2') +text(j(2), i(81), '-2') +text(j(2), i(82), '-2') +text(j(2), i(83), '-2') +text(j(2), i(84), '-2') + +% 3 +text(j(3), i(1), '2') +text(j(3), i(2), '2') +text(j(3), i(3), '2') +text(j(3), i(4), '2') +text(j(3), i(5), '2') +text(j(3), i(6), '2') +text(j(3), i(7), '2') +text(j(3), i(8), '2') +text(j(3), i(9), '2') +text(j(3), i(10), '2') +text(j(3), i(11), '2') +text(j(3), i(12), '2') +text(j(3), i(13), '2') +text(j(3), i(14), '2') +text(j(3), i(15), '2') +text(j(3), i(16), '2') +text(j(3), i(17), '2') +text(j(3), i(18), '2') +text(j(3), i(19), '2') +text(j(3), i(20), '2') +text(j(3), i(21), '2') +text(j(3), i(22), '2') +text(j(3), i(23), '2') +text(j(3), i(24), '2') +text(j(3), i(25), '2') +text(j(3), i(26), '2') +text(j(3), i(27), '2') +text(j(3), i(28), '1') +text(j(3), i(29), '1') +text(j(3), i(30), '1') +text(j(3), i(31), '1') +text(j(3), i(32), '1') +text(j(3), i(33), '1') +text(j(3), i(34), '1') +text(j(3), i(35), '1') +text(j(3), i(36), '1') +text(j(3), i(37), '1') +text(j(3), i(38), '1') +text(j(3), i(39), '0') +text(j(3), i(40), '0') +text(j(3), i(41), '0') +text(j(3), i(42), '0') + +text(j(3), i(43), '0') +text(j(3), i(44), '0') +text(j(3), i(45), '0') +text(j(3), i(46), '0') +text(j(3), i(47), '0') +text(j(3), i(48), '0') +text(j(3), i(49), '-1') +text(j(3), i(50), '-1') +text(j(3), i(51), '-1') +text(j(3), i(52), '-1') +text(j(3), i(53), '-1') +text(j(3), i(54), '-1') +text(j(3), i(55), '-1') +text(j(3), i(56), '-1') +text(j(3), i(57), '-1') +text(j(3), i(58), '-1') +text(j(3), i(59), '-2') +text(j(3), i(60), '-2') +text(j(3), i(61), '-2') +text(j(3), i(62), '-2') +text(j(3), i(63), '-2') +text(j(3), i(64), '-2') +text(j(3), i(65), '-2') +text(j(3), i(66), '-2') +text(j(3), i(67), '-2') +text(j(3), i(68), '-2') +text(j(3), i(69), '-2') +text(j(3), i(70), '-2') +text(j(3), i(71), '-2') +text(j(3), i(72), '-2') +text(j(3), i(73), '-2') +text(j(3), i(74), '-2') +text(j(3), i(75), '-2') +text(j(3), i(76), '-2') +text(j(3), i(77), '-2') +text(j(3), i(78), '-2') +text(j(3), i(79), '-2') +text(j(3), i(80), '-2') +text(j(3), i(81), '-2') +text(j(3), i(82), '-2') +text(j(3), i(83), '-2') +text(j(3), i(84), '-2') + +% 4 +text(j(4), i(1), '2') +text(j(4), i(2), '2') +text(j(4), i(3), '2') +text(j(4), i(4), '2') +text(j(4), i(5), '2') +text(j(4), i(6), '2') +text(j(4), i(7), '2') +text(j(4), i(8), '2') +text(j(4), i(9), '2') +text(j(4), i(10), '2') +text(j(4), i(11), '2') +text(j(4), i(12), '2') +text(j(4), i(13), '2') +text(j(4), i(14), '2') +text(j(4), i(15), '2') +text(j(4), i(16), '2') +text(j(4), i(17), '2') +text(j(4), i(18), '2') +text(j(4), i(19), '2') +text(j(4), i(20), '2') +text(j(4), i(21), '2') +text(j(4), i(22), '2') +text(j(4), i(23), '2') +text(j(4), i(24), '2') +text(j(4), i(25), '2') +text(j(4), i(26), '2') +text(j(4), i(27), '1') +text(j(4), i(28), '1') +text(j(4), i(29), '1') +text(j(4), i(30), '1') +text(j(4), i(31), '1') +text(j(4), i(32), '1') +text(j(4), i(33), '1') +text(j(4), i(34), '1') +text(j(4), i(35), '1') +text(j(4), i(36), '1') +text(j(4), i(37), '1') +text(j(4), i(38), '1') +text(j(4), i(39), '0') +text(j(4), i(40), '0') +text(j(4), i(41), '0') +text(j(4), i(42), '0') + +text(j(4), i(43), '0') +text(j(4), i(44), '0') +text(j(4), i(45), '0') +text(j(4), i(46), '0') +text(j(4), i(47), '0') +text(j(4), i(48), '0') +text(j(4), i(49), '-1') +text(j(4), i(50), '-1') +text(j(4), i(51), '-1') +text(j(4), i(52), '-1') +text(j(4), i(53), '-1') +text(j(4), i(54), '-1') +text(j(4), i(55), '-1') +text(j(4), i(56), '-1') +text(j(4), i(57), '-1') +text(j(4), i(58), '-1') +text(j(4), i(59), '-1') +text(j(4), i(60), '-1') +text(j(4), i(61), '-2') +text(j(4), i(62), '-2') +text(j(4), i(63), '-2') +text(j(4), i(64), '-2') +text(j(4), i(65), '-2') +text(j(4), i(66), '-2') +text(j(4), i(67), '-2') +text(j(4), i(68), '-2') +text(j(4), i(69), '-2') +text(j(4), i(70), '-2') +text(j(4), i(71), '-2') +text(j(4), i(72), '-2') +text(j(4), i(73), '-2') +text(j(4), i(74), '-2') +text(j(4), i(75), '-2') +text(j(4), i(76), '-2') +text(j(4), i(77), '-2') +text(j(4), i(78), '-2') +text(j(4), i(79), '-2') +text(j(4), i(80), '-2') +text(j(4), i(81), '-2') +text(j(4), i(82), '-2') +text(j(4), i(83), '-2') +text(j(4), i(84), '-2') + +% 5 +text(j(5), i(1), '2') +text(j(5), i(2), '2') +text(j(5), i(3), '2') +text(j(5), i(4), '2') +text(j(5), i(5), '2') +text(j(5), i(6), '2') +text(j(5), i(7), '2') +text(j(5), i(8), '2') +text(j(5), i(9), '2') +text(j(5), i(10), '2') +text(j(5), i(11), '2') +text(j(5), i(12), '2') +text(j(5), i(13), '2') +text(j(5), i(14), '2') +text(j(5), i(15), '2') +text(j(5), i(16), '2') +text(j(5), i(17), '2') +text(j(5), i(18), '2') +text(j(5), i(19), '2') +text(j(5), i(20), '2') +text(j(5), i(21), '2') +text(j(5), i(22), '2') +text(j(5), i(23), '2') +text(j(5), i(24), '2') +text(j(5), i(25), '1') +text(j(5), i(26), '1') +text(j(5), i(27), '1') +text(j(5), i(28), '1') +text(j(5), i(29), '1') +text(j(5), i(30), '1') +text(j(5), i(31), '1') +text(j(5), i(32), '1') +text(j(5), i(33), '1') +text(j(5), i(34), '1') +text(j(5), i(35), '1') +text(j(5), i(36), '1') +text(j(5), i(37), '0') +text(j(5), i(38), '0') +text(j(5), i(39), '0') +text(j(5), i(40), '0') +text(j(5), i(41), '0') +text(j(5), i(42), '0') + +text(j(5), i(43), '0') +text(j(5), i(44), '0') +text(j(5), i(45), '0') +text(j(5), i(46), '0') +text(j(5), i(47), '0') +text(j(5), i(48), '0') +text(j(5), i(49), '0') +text(j(5), i(50), '0') +text(j(5), i(51), '-1') +text(j(5), i(52), '-1') +text(j(5), i(53), '-1') +text(j(5), i(54), '-1') +text(j(5), i(55), '-1') +text(j(5), i(56), '-1') +text(j(5), i(57), '-1') +text(j(5), i(58), '-1') +text(j(5), i(59), '-1') +text(j(5), i(60), '-1') +text(j(5), i(61), '-1') +text(j(5), i(62), '-1') +text(j(5), i(63), '-2') +text(j(5), i(64), '-2') +text(j(5), i(65), '-2') +text(j(5), i(66), '-2') +text(j(5), i(67), '-2') +text(j(5), i(68), '-2') +text(j(5), i(69), '-2') +text(j(5), i(70), '-2') +text(j(5), i(71), '-2') +text(j(5), i(72), '-2') +text(j(5), i(73), '-2') +text(j(5), i(74), '-2') +text(j(5), i(75), '-2') +text(j(5), i(76), '-2') +text(j(5), i(77), '-2') +text(j(5), i(78), '-2') +text(j(5), i(79), '-2') +text(j(5), i(80), '-2') +text(j(5), i(81), '-2') +text(j(5), i(82), '-2') +text(j(5), i(83), '-2') +text(j(5), i(84), '-2') + +% 6 +text(j(6), i(1), '2') +text(j(6), i(2), '2') +text(j(6), i(3), '2') +text(j(6), i(4), '2') +text(j(6), i(5), '2') +text(j(6), i(6), '2') +text(j(6), i(7), '2') +text(j(6), i(8), '2') +text(j(6), i(9), '2') +text(j(6), i(10), '2') +text(j(6), i(11), '2') +text(j(6), i(12), '2') +text(j(6), i(13), '2') +text(j(6), i(14), '2') +text(j(6), i(15), '2') +text(j(6), i(16), '2') +text(j(6), i(17), '2') +text(j(6), i(18), '2') +text(j(6), i(19), '2') +text(j(6), i(20), '2') +text(j(6), i(21), '2') +text(j(6), i(22), '2') +text(j(6), i(23), '2') +text(j(6), i(24), '1') +text(j(6), i(25), '1') +text(j(6), i(26), '1') +text(j(6), i(27), '1') +text(j(6), i(28), '1') +text(j(6), i(29), '1') +text(j(6), i(30), '1') +text(j(6), i(31), '1') +text(j(6), i(32), '1') +text(j(6), i(33), '1') +text(j(6), i(34), '1') +text(j(6), i(35), '1') +text(j(6), i(36), '1') +text(j(6), i(37), '0') +text(j(6), i(38), '0') +text(j(6), i(39), '0') +text(j(6), i(40), '0') +text(j(6), i(41), '0') +text(j(6), i(42), '0') + +text(j(6), i(43), '0') +text(j(6), i(44), '0') +text(j(6), i(45), '0') +text(j(6), i(46), '0') +text(j(6), i(47), '0') +text(j(6), i(48), '0') +text(j(6), i(49), '0') +text(j(6), i(50), '0') +text(j(6), i(51), '-1') +text(j(6), i(52), '-1') +text(j(6), i(53), '-1') +text(j(6), i(54), '-1') +text(j(6), i(55), '-1') +text(j(6), i(56), '-1') +text(j(6), i(57), '-1') +text(j(6), i(58), '-1') +text(j(6), i(59), '-1') +text(j(6), i(60), '-1') +text(j(6), i(61), '-1') +text(j(6), i(62), '-1') +text(j(6), i(63), '-2') +text(j(6), i(64), '-2') +text(j(6), i(65), '-2') +text(j(6), i(66), '-2') +text(j(6), i(67), '-2') +text(j(6), i(68), '-2') +text(j(6), i(69), '-2') +text(j(6), i(70), '-2') +text(j(6), i(71), '-2') +text(j(6), i(72), '-2') +text(j(6), i(73), '-2') +text(j(6), i(74), '-2') +text(j(6), i(75), '-2') +text(j(6), i(76), '-2') +text(j(6), i(77), '-2') +text(j(6), i(78), '-2') +text(j(6), i(79), '-2') +text(j(6), i(80), '-2') +text(j(6), i(81), '-2') +text(j(6), i(82), '-2') +text(j(6), i(83), '-2') +text(j(6), i(84), '-2') + +% 7 +text(j(7), i(1), '2') +text(j(7), i(2), '2') +text(j(7), i(3), '2') +text(j(7), i(4), '2') +text(j(7), i(5), '2') +text(j(7), i(6), '2') +text(j(7), i(7), '2') +text(j(7), i(8), '2') +text(j(7), i(9), '2') +text(j(7), i(10), '2') +text(j(7), i(11), '2') +text(j(7), i(12), '2') +text(j(7), i(13), '2') +text(j(7), i(14), '2') +text(j(7), i(15), '2') +text(j(7), i(16), '2') +text(j(7), i(17), '2') +text(j(7), i(18), '2') +text(j(7), i(19), '2') +text(j(7), i(20), '2') +text(j(7), i(21), '2') +text(j(7), i(22), '2') +text(j(7), i(23), '1') +text(j(7), i(24), '1') +text(j(7), i(25), '1') +text(j(7), i(26), '1') +text(j(7), i(27), '1') +text(j(7), i(28), '1') +text(j(7), i(29), '1') +text(j(7), i(30), '1') +text(j(7), i(31), '1') +text(j(7), i(32), '1') +text(j(7), i(33), '1') +text(j(7), i(34), '1') +text(j(7), i(35), '0') +text(j(7), i(36), '0') +text(j(7), i(37), '0') +text(j(7), i(38), '0') +text(j(7), i(39), '0') +text(j(7), i(40), '0') +text(j(7), i(41), '0') +text(j(7), i(42), '0') + +text(j(7), i(43), '0') +text(j(7), i(44), '0') +text(j(7), i(45), '0') +text(j(7), i(46), '0') +text(j(7), i(47), '0') +text(j(7), i(48), '0') +text(j(7), i(49), '0') +text(j(7), i(50), '0') +text(j(7), i(51), '-1') +text(j(7), i(52), '-1') +text(j(7), i(53), '-1') +text(j(7), i(54), '-1') +text(j(7), i(55), '-1') +text(j(7), i(56), '-1') +text(j(7), i(57), '-1') +text(j(7), i(58), '-1') +text(j(7), i(59), '-1') +text(j(7), i(60), '-1') +text(j(7), i(61), '-1') +text(j(7), i(62), '-1') +text(j(7), i(63), '-1') +text(j(7), i(64), '-1') +text(j(7), i(65), '-2') +text(j(7), i(66), '-2') +text(j(7), i(67), '-2') +text(j(7), i(68), '-2') +text(j(7), i(69), '-2') +text(j(7), i(70), '-2') +text(j(7), i(71), '-2') +text(j(7), i(72), '-2') +text(j(7), i(73), '-2') +text(j(7), i(74), '-2') +text(j(7), i(75), '-2') +text(j(7), i(76), '-2') +text(j(7), i(77), '-2') +text(j(7), i(78), '-2') +text(j(7), i(79), '-2') +text(j(7), i(80), '-2') +text(j(7), i(81), '-2') +text(j(7), i(82), '-2') +text(j(7), i(83), '-2') +text(j(7), i(84), '-2') + +% 8 +text(j(8), i(1), '2') +text(j(8), i(2), '2') +text(j(8), i(3), '2') +text(j(8), i(4), '2') +text(j(8), i(5), '2') +text(j(8), i(6), '2') +text(j(8), i(7), '2') +text(j(8), i(8), '2') +text(j(8), i(9), '2') +text(j(8), i(10), '2') +text(j(8), i(11), '2') +text(j(8), i(12), '2') +text(j(8), i(13), '2') +text(j(8), i(14), '2') +text(j(8), i(15), '2') +text(j(8), i(16), '2') +text(j(8), i(17), '2') +text(j(8), i(18), '2') +text(j(8), i(19), '1') +text(j(8), i(20), '1') +text(j(8), i(21), '1') +text(j(8), i(22), '1') +text(j(8), i(23), '1') +text(j(8), i(24), '1') +text(j(8), i(25), '1') +text(j(8), i(26), '1') +text(j(8), i(27), '1') +text(j(8), i(28), '1') +text(j(8), i(29), '1') +text(j(8), i(30), '1') +text(j(8), i(31), '1') +text(j(8), i(32), '1') +text(j(8), i(33), '1') +text(j(8), i(34), '1') +text(j(8), i(35), '0') +text(j(8), i(36), '0') +text(j(8), i(37), '0') +text(j(8), i(38), '0') +text(j(8), i(39), '0') +text(j(8), i(40), '0') +text(j(8), i(41), '0') +text(j(8), i(42), '0') + +text(j(8), i(43), '0') +text(j(8), i(44), '0') +text(j(8), i(45), '0') +text(j(8), i(46), '0') +text(j(8), i(47), '0') +text(j(8), i(48), '0') +text(j(8), i(49), '0') +text(j(8), i(50), '0') +text(j(8), i(51), '-1') +text(j(8), i(52), '-1') +text(j(8), i(53), '-1') +text(j(8), i(54), '-1') +text(j(8), i(55), '-1') +text(j(8), i(56), '-1') +text(j(8), i(57), '-1') +text(j(8), i(58), '-1') +text(j(8), i(59), '-1') +text(j(8), i(60), '-1') +text(j(8), i(61), '-1') +text(j(8), i(62), '-1') +text(j(8), i(63), '-1') +text(j(8), i(64), '-1') +text(j(8), i(65), '-1') +text(j(8), i(66), '-1') +text(j(8), i(67), '-2') +text(j(8), i(68), '-2') +text(j(8), i(69), '-2') +text(j(8), i(70), '-2') +text(j(8), i(71), '-2') +text(j(8), i(72), '-2') +text(j(8), i(73), '-2') +text(j(8), i(74), '-2') +text(j(8), i(75), '-2') +text(j(8), i(76), '-2') +text(j(8), i(77), '-2') +text(j(8), i(78), '-2') +text(j(8), i(79), '-2') +text(j(8), i(80), '-2') +text(j(8), i(81), '-2') +text(j(8), i(82), '-2') +text(j(8), i(83), '-2') +text(j(8), i(84), '-2') + +orient('landscape') +print -dpng 'pd_csa.png' + + + + + diff --git a/pipelined/srt/stine/srt4div b/pipelined/srt/stine/srt4div new file mode 100755 index 000000000..a780abd4b Binary files /dev/null and b/pipelined/srt/stine/srt4div differ diff --git a/pipelined/srt/stine/srt4div.c b/pipelined/srt/stine/srt4div.c new file mode 100755 index 000000000..65fdac53b --- /dev/null +++ b/pipelined/srt/stine/srt4div.c @@ -0,0 +1,226 @@ +#include "disp.h" +#include + +// QSLC is for division by recuerrence for +// r=4 using a CPA - See Table 5.9 EL +int qslc (double prem, double d) { + + int q; + + // For Debugging + printf("d --> %lg\n", d); + printf("rw --> %lg\n", prem); + + if ((d>=8.0)&&(d<9.0)) { + if (prem>=6.0) + q = 2; + else if (prem>=2.0) + q = 1; + else if (prem>=-2.0) + q = 0; + else if (prem >= -6) + q = -1; + else + q = -2; + return q; + } + + if ((d>=9.0)&&(d<10.0)) { + if (prem>=7) + q = 2; + else if (prem>=2.0) + q = 1; + else if (prem>=-2.0) + q = 0; + else if (prem >= 7.0) + q = -1; + else + q = -2; + return q; + } + + if ((d>=10.0)&&(d<11.0)) { + if (prem>=8.0) + q = 2; + else if (prem>=2.0) + q = 1; + else if (prem>=-2.0) + q = 0; + else if (prem >= -8.0) + q = -1; + else + q = -2; + return q; + } + + if ((d>=11.0)&&(d<12.0)) { + if (prem>=8.0) + q = 2; + else if (prem>=2.0) + q = 1; + else if (prem>=-2.0) + q = 0; + else if (prem >= -8.0) + q = -1; + else + q = -2; + return q; + } + + if ((d>=12.0)&&(d<13.0)) { + if (prem>=10.0) + q = 2; + else if (prem>=4.0) + q = 1; + else if (prem>=-4.0) + q = 0; + else if (prem >= -10.0) + q = -1; + else + q = -2; + return q; + } + + if ((d>=13.0)&&(d<14.0)) { + if (prem>=10.0) + q = 2; + else if (prem>=4.0) + q = 1; + else if (prem>=-4.0) + q = 0; + else if (prem >= -10.0) + q = -1; + else + q = -2; + return q; + } + + if ((d>=14.0)&&(d<15.0)) { + if (prem>=10.0) + q = 2; + else if (prem>=4.0) + q = 1; + else if (prem>=-4.0) + q = 0; + else if (prem >= -10.0) + q = -1; + else + q = -2; + return q; + } + + if ((d>=15.0)&&(d<16.0)) { + if (prem>=12.0) + q = 2; + else if (prem>=4.0) + q = 1; + else if (prem>=-4.0) + q = 0; + else if (prem >= -12.0) + q = -1; + else + q = -2; + return q; + } + +} + + +/* + This routine performs a radix-4 SRT division + algorithm. The user inputs the numerator, the denominator, + and the number of iterations. It assumes that 0.5 <= D < 1. + +*/ + +int main(int argc, char* argv[]) { + + double P, N, D, Q, RQ, RD, RREM, scale; + int q; + int num_iter, i; + int prec; + int radix = 4; + + if (argc < 5) { + fprintf(stderr, + "Usage: %s numerator denominator num_iterations prec\n", + argv[0]); + exit(1); + } + sscanf(argv[1],"%lg", &N); + sscanf(argv[2],"%lg", &D); + sscanf(argv[3],"%d", &num_iter); + sscanf(argv[4],"%d", &prec); + // Round to precision + N = rne(N, prec); + D = rne(D, prec); + printf("N = "); + disp_bin(N, 3, prec, stdout); + printf("\n"); + printf("D = "); + disp_bin(D, 3, prec, stdout); + printf("\n"); + + Q = 0; + P = N * pow(2.0, -log2(radix)); + printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n", + N, D, N/D, num_iter); + for (scale = 1, i = 0; i < num_iter; i++) { + // Shift by r + scale = scale * pow(2.0, -log2(radix)); + // (4*P)*8 because of footnote in Table 5.9, page 296 EL + // i.e., real value = shown value / 8 + // D*16 since we use 4 bits of D (1 bit known) + q = qslc(flr((radix * P) * 8, 3), D*16); + printf("4*W[n] = "); + disp_bin(radix*P, 3, prec, stdout); + printf("\n"); + printf("q*D = "); + disp_bin(q*D, 3, prec, stdout); + printf("\n"); + printf("W[n+1] = "); + disp_bin(P ,3, prec, stdout); + printf("\n"); + // Recurrence + P = radix * P - q * D; + // OTFC + Q = Q + q * scale; + printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P); + printf("i = %d, q = %d", i, q); + printf(", Q = "); + disp_bin(Q, 3, prec, stdout); + printf(", W = "); + disp_bin(P, 3, prec, stdout); + printf("\n\n"); + } + // Is shifted partial remainder negative? + if (P < 0) { + Q = Q - pow(2.0, -prec); + P = P + D; + printf("\nCorrecting Negative Remainder\n"); + printf("Q = %1.18lf, W = %1.18lf\n", Q, P); + printf("Q = "); + disp_bin(Q, 3, prec, stdout); + printf(", W = "); + disp_bin(P, 3, prec, stdout); + printf("\n"); + } + + // Output Results + RQ = flr(N/D, prec); + // Since q_{computed} = q / radix, multiply by radix + RD = Q * radix; + printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD); + printf("true = "); + disp_bin(RQ, 3, prec, stdout); + printf(", computed = "); + disp_bin(RD, 3, prec, stdout); + printf("\n\n"); + printf("REM = %1.18lf \n", P); + printf("REM = "); + disp_bin(P, 3, prec, stdout); + printf("\n\n"); + + return 0; + +} diff --git a/pipelined/srt/stine/test_iter128.sv b/pipelined/srt/stine/test_iter128.sv new file mode 100644 index 000000000..c2f8f5a73 --- /dev/null +++ b/pipelined/srt/stine/test_iter128.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [127:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [127:0] Q; + logic [127:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [127:0] Ncomp; + logic [127:0] Dcomp; + logic [127:0] Qcomp; + logic [127:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter128_unsigned.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 128'h0; + #0 D = 128'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom(), $urandom(), $urandom()}; + D = {$urandom(), $urandom(), $urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + vectornum = vectornum + 1; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter128S.sv b/pipelined/srt/stine/test_iter128S.sv new file mode 100644 index 000000000..7757041f6 --- /dev/null +++ b/pipelined/srt/stine/test_iter128S.sv @@ -0,0 +1,90 @@ +`include "idiv-config.vh" + +module tb; + + logic [127:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [127:0] Q; + logic [127:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [31:0] rnd1; + logic [31:0] rnd2; + logic [127:0] Ncomp; + logic [127:0] Dcomp; + logic [127:0] Qcomp; + logic [127:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(128) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter128_signed.out"); + end + + /* + // VCD generation for power estimation + initial + begin + $dumpfile("iter128_signed.vcd"); + $dumpvars (0,tb.dut); + end + */ + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b1; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 128'h0; + #0 D = 128'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom(), $urandom(), $urandom()}; + D = {$urandom(), $urandom(), $urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (1) + @(posedge clk); + start <= 1'b0; + repeat (65) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = $signed(Ncomp)/$signed(Dcomp); + Rcomp = $signed(Ncomp)%$signed(Dcomp); + vectornum = vectornum + 1; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter32.sv b/pipelined/srt/stine/test_iter32.sv new file mode 100755 index 000000000..6590b5a16 --- /dev/null +++ b/pipelined/srt/stine/test_iter32.sv @@ -0,0 +1,85 @@ +`include "idiv-config.vh" + +module tb; + + logic [31:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [31:0] Q; + logic [31:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [31:0] Ncomp; + logic [31:0] Dcomp; + logic [31:0] Qcomp; + logic [31:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter32_unsigned.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 32'h0; + #0 D = 32'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = $urandom; + D = $urandom; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + vectornum = vectornum + 1; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb + + + + + + diff --git a/pipelined/srt/stine/test_iter32S.sv b/pipelined/srt/stine/test_iter32S.sv new file mode 100644 index 000000000..e3b271b4e --- /dev/null +++ b/pipelined/srt/stine/test_iter32S.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [31:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [31:0] Q; + logic [31:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [31:0] Ncomp; + logic [31:0] Dcomp; + logic [31:0] Qcomp; + logic [31:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(32) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter32_signed.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b1; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 32'h0; + #0 D = 32'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = $urandom; + D = $urandom; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = $signed(Ncomp)/$signed(Dcomp); + Rcomp = $signed(Ncomp)%$signed(Dcomp); + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + vectornum = vectornum + 1; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter64.sv b/pipelined/srt/stine/test_iter64.sv new file mode 100755 index 000000000..3da85c20a --- /dev/null +++ b/pipelined/srt/stine/test_iter64.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [63:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [63:0] Q; + logic [63:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [63:0] Ncomp; + logic [63:0] Dcomp; + logic [63:0] Qcomp; + logic [63:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter64_unsigned.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 64'h0; + #0 D = 64'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom()}; + D = {$urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + vectornum = vectornum + 1; + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/test_iter64S.sv b/pipelined/srt/stine/test_iter64S.sv new file mode 100644 index 000000000..cdfb573a7 --- /dev/null +++ b/pipelined/srt/stine/test_iter64S.sv @@ -0,0 +1,79 @@ +`include "idiv-config.vh" + +module tb; + + logic [63:0] N, D; + logic clk; + logic reset; + logic start; + logic S; + + logic [63:0] Q; + logic [63:0] rem0; + logic div0; + logic done; + + integer handle3; + integer desc3; + integer i; + + logic [63:0] Ncomp; + logic [63:0] Dcomp; + logic [63:0] Qcomp; + logic [63:0] Rcomp; + + logic [31:0] vectornum; + logic [31:0] errors; + + intdiv #(64) dut (Q, done, rem0, div0, N, D, clk, reset, start, S); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + vectornum = 0; + errors = 0; + handle3 = $fopen("iter64_signed.out"); + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 S = 1'b1; + #0 reset = 1'b1; + #30 reset = 1'b0; + #30 N = 64'h0; + #0 D = 64'h0; + for (i=0; i<`IDIV_TESTS; i=i+1) + begin + N = {$urandom(), $urandom()}; + D = {$urandom(), $urandom()}; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = $signed(Ncomp)/$signed(Dcomp); + Rcomp = $signed(Ncomp)%$signed(Dcomp); + if ((Q !== Qcomp)) begin + errors = errors + 1; + end + vectornum = vectornum + 1; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + $display("%d tests completed, %d errors", vectornum, errors); + $finish; + end + +endmodule // tb diff --git a/pipelined/srt/stine/tmp b/pipelined/srt/stine/tmp new file mode 100644 index 000000000..c7cdf64f8 --- /dev/null +++ b/pipelined/srt/stine/tmp @@ -0,0 +1,1026 @@ + case({D[5:3],Wmsbs}) + 10'b000_0000000: q = 4'b0000; + 10'b000_0000001: q = 4'b0000; + 10'b000_0000010: q = 4'b0000; + 10'b000_0000011: q = 4'b0000; + 10'b000_0000100: q = 4'b0100; + 10'b000_0000101: q = 4'b0100; + 10'b000_0000110: q = 4'b0100; + 10'b000_0000111: q = 4'b0100; + 10'b000_0001000: q = 4'b0100; + 10'b000_0001001: q = 4'b0100; + 10'b000_0001010: q = 4'b0100; + 10'b000_0001011: q = 4'b0100; + 10'b000_0001100: q = 4'b1000; + 10'b000_0001101: q = 4'b1000; + 10'b000_0001110: q = 4'b1000; + 10'b000_0001111: q = 4'b1000; + 10'b000_0010000: q = 4'b1000; + 10'b000_0010001: q = 4'b1000; + 10'b000_0010010: q = 4'b1000; + 10'b000_0010011: q = 4'b1000; + 10'b000_0010100: q = 4'b1000; + 10'b000_0010101: q = 4'b1000; + 10'b000_0010110: q = 4'b1000; + 10'b000_0010111: q = 4'b1000; + 10'b000_0011000: q = 4'b1000; + 10'b000_0011001: q = 4'b1000; + 10'b000_0011010: q = 4'b1000; + 10'b000_0011011: q = 4'b1000; + 10'b000_0011100: q = 4'b1000; + 10'b000_0011101: q = 4'b1000; + 10'b000_0011110: q = 4'b1000; + 10'b000_0011111: q = 4'b1000; + 10'b000_0100000: q = 4'b1000; + 10'b000_0100001: q = 4'b1000; + 10'b000_0100010: q = 4'b1000; + 10'b000_0100011: q = 4'b1000; + 10'b000_0100100: q = 4'b1000; + 10'b000_0100101: q = 4'b1000; + 10'b000_0100110: q = 4'b1000; + 10'b000_0100111: q = 4'b1000; + 10'b000_0101000: q = 4'b1000; + 10'b000_0101001: q = 4'b1000; + 10'b000_0101010: q = 4'b1000; + 10'b000_0101011: q = 4'b1000; + 10'b000_0101100: q = 4'b1000; + 10'b000_0101101: q = 4'b1000; + 10'b000_0101110: q = 4'b1000; + 10'b000_0101111: q = 4'b1000; + 10'b000_0110000: q = 4'b1000; + 10'b000_0110001: q = 4'b1000; + 10'b000_0110010: q = 4'b1000; + 10'b000_0110011: q = 4'b1000; + 10'b000_0110100: q = 4'b1000; + 10'b000_0110101: q = 4'b1000; + 10'b000_0110110: q = 4'b1000; + 10'b000_0110111: q = 4'b1000; + 10'b000_0111000: q = 4'b1000; + 10'b000_0111001: q = 4'b1000; + 10'b000_0111010: q = 4'b1000; + 10'b000_0111011: q = 4'b1000; + 10'b000_0111100: q = 4'b1000; + 10'b000_0111101: q = 4'b1000; + 10'b000_0111110: q = 4'b1000; + 10'b000_0111111: q = 4'b1000; + 10'b000_1000000: q = 4'b0001; + 10'b000_1000001: q = 4'b0001; + 10'b000_1000010: q = 4'b0001; + 10'b000_1000011: q = 4'b0001; + 10'b000_1000100: q = 4'b0001; + 10'b000_1000101: q = 4'b0001; + 10'b000_1000110: q = 4'b0001; + 10'b000_1000111: q = 4'b0001; + 10'b000_1001000: q = 4'b0001; + 10'b000_1001001: q = 4'b0001; + 10'b000_1001010: q = 4'b0001; + 10'b000_1001011: q = 4'b0001; + 10'b000_1001100: q = 4'b0001; + 10'b000_1001101: q = 4'b0001; + 10'b000_1001110: q = 4'b0001; + 10'b000_1001111: q = 4'b0001; + 10'b000_1010000: q = 4'b0001; + 10'b000_1010001: q = 4'b0001; + 10'b000_1010010: q = 4'b0001; + 10'b000_1010011: q = 4'b0001; + 10'b000_1010100: q = 4'b0001; + 10'b000_1010101: q = 4'b0001; + 10'b000_1010110: q = 4'b0001; + 10'b000_1010111: q = 4'b0001; + 10'b000_1011000: q = 4'b0001; + 10'b000_1011001: q = 4'b0001; + 10'b000_1011010: q = 4'b0001; + 10'b000_1011011: q = 4'b0001; + 10'b000_1011100: q = 4'b0001; + 10'b000_1011101: q = 4'b0001; + 10'b000_1011110: q = 4'b0001; + 10'b000_1011111: q = 4'b0001; + 10'b000_1100000: q = 4'b0001; + 10'b000_1100001: q = 4'b0001; + 10'b000_1100010: q = 4'b0001; + 10'b000_1100011: q = 4'b0001; + 10'b000_1100100: q = 4'b0001; + 10'b000_1100101: q = 4'b0001; + 10'b000_1100110: q = 4'b0001; + 10'b000_1100111: q = 4'b0001; + 10'b000_1101000: q = 4'b0001; + 10'b000_1101001: q = 4'b0001; + 10'b000_1101010: q = 4'b0001; + 10'b000_1101011: q = 4'b0001; + 10'b000_1101100: q = 4'b0001; + 10'b000_1101101: q = 4'b0001; + 10'b000_1101110: q = 4'b0001; + 10'b000_1101111: q = 4'b0001; + 10'b000_1110000: q = 4'b0001; + 10'b000_1110001: q = 4'b0001; + 10'b000_1110010: q = 4'b0001; + 10'b000_1110011: q = 4'b0010; + 10'b000_1110100: q = 4'b0010; + 10'b000_1110101: q = 4'b0010; + 10'b000_1110110: q = 4'b0010; + 10'b000_1110111: q = 4'b0010; + 10'b000_1111000: q = 4'b0010; + 10'b000_1111001: q = 4'b0010; + 10'b000_1111010: q = 4'b0010; + 10'b000_1111011: q = 4'b0010; + 10'b000_1111100: q = 4'b0000; + 10'b000_1111101: q = 4'b0000; + 10'b000_1111110: q = 4'b0000; + 10'b000_1111111: q = 4'b0000; + 10'b001_0000000: q = 4'b0000; + 10'b001_0000001: q = 4'b0000; + 10'b001_0000010: q = 4'b0000; + 10'b001_0000011: q = 4'b0000; + 10'b001_0000100: q = 4'b0100; + 10'b001_0000101: q = 4'b0100; + 10'b001_0000110: q = 4'b0100; + 10'b001_0000111: q = 4'b0100; + 10'b001_0001000: q = 4'b0100; + 10'b001_0001001: q = 4'b0100; + 10'b001_0001010: q = 4'b0100; + 10'b001_0001011: q = 4'b0100; + 10'b001_0001100: q = 4'b0100; + 10'b001_0001101: q = 4'b0100; + 10'b001_0001110: q = 4'b1000; + 10'b001_0001111: q = 4'b1000; + 10'b001_0010000: q = 4'b1000; + 10'b001_0010001: q = 4'b1000; + 10'b001_0010010: q = 4'b1000; + 10'b001_0010011: q = 4'b1000; + 10'b001_0010100: q = 4'b1000; + 10'b001_0010101: q = 4'b1000; + 10'b001_0010110: q = 4'b1000; + 10'b001_0010111: q = 4'b1000; + 10'b001_0011000: q = 4'b1000; + 10'b001_0011001: q = 4'b1000; + 10'b001_0011010: q = 4'b1000; + 10'b001_0011011: q = 4'b1000; + 10'b001_0011100: q = 4'b1000; + 10'b001_0011101: q = 4'b1000; + 10'b001_0011110: q = 4'b1000; + 10'b001_0011111: q = 4'b1000; + 10'b001_0100000: q = 4'b1000; + 10'b001_0100001: q = 4'b1000; + 10'b001_0100010: q = 4'b1000; + 10'b001_0100011: q = 4'b1000; + 10'b001_0100100: q = 4'b1000; + 10'b001_0100101: q = 4'b1000; + 10'b001_0100110: q = 4'b1000; + 10'b001_0100111: q = 4'b1000; + 10'b001_0101000: q = 4'b1000; + 10'b001_0101001: q = 4'b1000; + 10'b001_0101010: q = 4'b1000; + 10'b001_0101011: q = 4'b1000; + 10'b001_0101100: q = 4'b1000; + 10'b001_0101101: q = 4'b1000; + 10'b001_0101110: q = 4'b1000; + 10'b001_0101111: q = 4'b1000; + 10'b001_0110000: q = 4'b1000; + 10'b001_0110001: q = 4'b1000; + 10'b001_0110010: q = 4'b1000; + 10'b001_0110011: q = 4'b1000; + 10'b001_0110100: q = 4'b1000; + 10'b001_0110101: q = 4'b1000; + 10'b001_0110110: q = 4'b1000; + 10'b001_0110111: q = 4'b1000; + 10'b001_0111000: q = 4'b1000; + 10'b001_0111001: q = 4'b1000; + 10'b001_0111010: q = 4'b1000; + 10'b001_0111011: q = 4'b1000; + 10'b001_0111100: q = 4'b1000; + 10'b001_0111101: q = 4'b1000; + 10'b001_0111110: q = 4'b1000; + 10'b001_0111111: q = 4'b1000; + 10'b001_1000000: q = 4'b0001; + 10'b001_1000001: q = 4'b0001; + 10'b001_1000010: q = 4'b0001; + 10'b001_1000011: q = 4'b0001; + 10'b001_1000100: q = 4'b0001; + 10'b001_1000101: q = 4'b0001; + 10'b001_1000110: q = 4'b0001; + 10'b001_1000111: q = 4'b0001; + 10'b001_1001000: q = 4'b0001; + 10'b001_1001001: q = 4'b0001; + 10'b001_1001010: q = 4'b0001; + 10'b001_1001011: q = 4'b0001; + 10'b001_1001100: q = 4'b0001; + 10'b001_1001101: q = 4'b0001; + 10'b001_1001110: q = 4'b0001; + 10'b001_1001111: q = 4'b0001; + 10'b001_1010000: q = 4'b0001; + 10'b001_1010001: q = 4'b0001; + 10'b001_1010010: q = 4'b0001; + 10'b001_1010011: q = 4'b0001; + 10'b001_1010100: q = 4'b0001; + 10'b001_1010101: q = 4'b0001; + 10'b001_1010110: q = 4'b0001; + 10'b001_1010111: q = 4'b0001; + 10'b001_1011000: q = 4'b0001; + 10'b001_1011001: q = 4'b0001; + 10'b001_1011010: q = 4'b0001; + 10'b001_1011011: q = 4'b0001; + 10'b001_1011100: q = 4'b0001; + 10'b001_1011101: q = 4'b0001; + 10'b001_1011110: q = 4'b0001; + 10'b001_1011111: q = 4'b0001; + 10'b001_1100000: q = 4'b0001; + 10'b001_1100001: q = 4'b0001; + 10'b001_1100010: q = 4'b0001; + 10'b001_1100011: q = 4'b0001; + 10'b001_1100100: q = 4'b0001; + 10'b001_1100101: q = 4'b0001; + 10'b001_1100110: q = 4'b0001; + 10'b001_1100111: q = 4'b0001; + 10'b001_1101000: q = 4'b0001; + 10'b001_1101001: q = 4'b0001; + 10'b001_1101010: q = 4'b0001; + 10'b001_1101011: q = 4'b0001; + 10'b001_1101100: q = 4'b0001; + 10'b001_1101101: q = 4'b0001; + 10'b001_1101110: q = 4'b0001; + 10'b001_1101111: q = 4'b0001; + 10'b001_1110000: q = 4'b0001; + 10'b001_1110001: q = 4'b0010; + 10'b001_1110010: q = 4'b0010; + 10'b001_1110011: q = 4'b0010; + 10'b001_1110100: q = 4'b0010; + 10'b001_1110101: q = 4'b0010; + 10'b001_1110110: q = 4'b0010; + 10'b001_1110111: q = 4'b0010; + 10'b001_1111000: q = 4'b0010; + 10'b001_1111001: q = 4'b0010; + 10'b001_1111010: q = 4'b0000; + 10'b001_1111011: q = 4'b0000; + 10'b001_1111100: q = 4'b0000; + 10'b001_1111101: q = 4'b0000; + 10'b001_1111110: q = 4'b0000; + 10'b001_1111111: q = 4'b0000; + 10'b010_0000000: q = 4'b0000; + 10'b010_0000001: q = 4'b0000; + 10'b010_0000010: q = 4'b0000; + 10'b010_0000011: q = 4'b0000; + 10'b010_0000100: q = 4'b0100; + 10'b010_0000101: q = 4'b0100; + 10'b010_0000110: q = 4'b0100; + 10'b010_0000111: q = 4'b0100; + 10'b010_0001000: q = 4'b0100; + 10'b010_0001001: q = 4'b0100; + 10'b010_0001010: q = 4'b0100; + 10'b010_0001011: q = 4'b0100; + 10'b010_0001100: q = 4'b0100; + 10'b010_0001101: q = 4'b0100; + 10'b010_0001110: q = 4'b0100; + 10'b010_0001111: q = 4'b1000; + 10'b010_0010000: q = 4'b1000; + 10'b010_0010001: q = 4'b1000; + 10'b010_0010010: q = 4'b1000; + 10'b010_0010011: q = 4'b1000; + 10'b010_0010100: q = 4'b1000; + 10'b010_0010101: q = 4'b1000; + 10'b010_0010110: q = 4'b1000; + 10'b010_0010111: q = 4'b1000; + 10'b010_0011000: q = 4'b1000; + 10'b010_0011001: q = 4'b1000; + 10'b010_0011010: q = 4'b1000; + 10'b010_0011011: q = 4'b1000; + 10'b010_0011100: q = 4'b1000; + 10'b010_0011101: q = 4'b1000; + 10'b010_0011110: q = 4'b1000; + 10'b010_0011111: q = 4'b1000; + 10'b010_0100000: q = 4'b1000; + 10'b010_0100001: q = 4'b1000; + 10'b010_0100010: q = 4'b1000; + 10'b010_0100011: q = 4'b1000; + 10'b010_0100100: q = 4'b1000; + 10'b010_0100101: q = 4'b1000; + 10'b010_0100110: q = 4'b1000; + 10'b010_0100111: q = 4'b1000; + 10'b010_0101000: q = 4'b1000; + 10'b010_0101001: q = 4'b1000; + 10'b010_0101010: q = 4'b1000; + 10'b010_0101011: q = 4'b1000; + 10'b010_0101100: q = 4'b1000; + 10'b010_0101101: q = 4'b1000; + 10'b010_0101110: q = 4'b1000; + 10'b010_0101111: q = 4'b1000; + 10'b010_0110000: q = 4'b1000; + 10'b010_0110001: q = 4'b1000; + 10'b010_0110010: q = 4'b1000; + 10'b010_0110011: q = 4'b1000; + 10'b010_0110100: q = 4'b1000; + 10'b010_0110101: q = 4'b1000; + 10'b010_0110110: q = 4'b1000; + 10'b010_0110111: q = 4'b1000; + 10'b010_0111000: q = 4'b1000; + 10'b010_0111001: q = 4'b1000; + 10'b010_0111010: q = 4'b1000; + 10'b010_0111011: q = 4'b1000; + 10'b010_0111100: q = 4'b1000; + 10'b010_0111101: q = 4'b1000; + 10'b010_0111110: q = 4'b1000; + 10'b010_0111111: q = 4'b1000; + 10'b010_1000000: q = 4'b0001; + 10'b010_1000001: q = 4'b0001; + 10'b010_1000010: q = 4'b0001; + 10'b010_1000011: q = 4'b0001; + 10'b010_1000100: q = 4'b0001; + 10'b010_1000101: q = 4'b0001; + 10'b010_1000110: q = 4'b0001; + 10'b010_1000111: q = 4'b0001; + 10'b010_1001000: q = 4'b0001; + 10'b010_1001001: q = 4'b0001; + 10'b010_1001010: q = 4'b0001; + 10'b010_1001011: q = 4'b0001; + 10'b010_1001100: q = 4'b0001; + 10'b010_1001101: q = 4'b0001; + 10'b010_1001110: q = 4'b0001; + 10'b010_1001111: q = 4'b0001; + 10'b010_1010000: q = 4'b0001; + 10'b010_1010001: q = 4'b0001; + 10'b010_1010010: q = 4'b0001; + 10'b010_1010011: q = 4'b0001; + 10'b010_1010100: q = 4'b0001; + 10'b010_1010101: q = 4'b0001; + 10'b010_1010110: q = 4'b0001; + 10'b010_1010111: q = 4'b0001; + 10'b010_1011000: q = 4'b0001; + 10'b010_1011001: q = 4'b0001; + 10'b010_1011010: q = 4'b0001; + 10'b010_1011011: q = 4'b0001; + 10'b010_1011100: q = 4'b0001; + 10'b010_1011101: q = 4'b0001; + 10'b010_1011110: q = 4'b0001; + 10'b010_1011111: q = 4'b0001; + 10'b010_1100000: q = 4'b0001; + 10'b010_1100001: q = 4'b0001; + 10'b010_1100010: q = 4'b0001; + 10'b010_1100011: q = 4'b0001; + 10'b010_1100100: q = 4'b0001; + 10'b010_1100101: q = 4'b0001; + 10'b010_1100110: q = 4'b0001; + 10'b010_1100111: q = 4'b0001; + 10'b010_1101000: q = 4'b0001; + 10'b010_1101001: q = 4'b0001; + 10'b010_1101010: q = 4'b0001; + 10'b010_1101011: q = 4'b0001; + 10'b010_1101100: q = 4'b0001; + 10'b010_1101101: q = 4'b0001; + 10'b010_1101110: q = 4'b0001; + 10'b010_1101111: q = 4'b0001; + 10'b010_1110000: q = 4'b0010; + 10'b010_1110001: q = 4'b0010; + 10'b010_1110010: q = 4'b0010; + 10'b010_1110011: q = 4'b0010; + 10'b010_1110100: q = 4'b0010; + 10'b010_1110101: q = 4'b0010; + 10'b010_1110110: q = 4'b0010; + 10'b010_1110111: q = 4'b0010; + 10'b010_1111000: q = 4'b0010; + 10'b010_1111001: q = 4'b0010; + 10'b010_1111010: q = 4'b0000; + 10'b010_1111011: q = 4'b0000; + 10'b010_1111100: q = 4'b0000; + 10'b010_1111101: q = 4'b0000; + 10'b010_1111110: q = 4'b0000; + 10'b010_1111111: q = 4'b0000; + 10'b011_0000000: q = 4'b0000; + 10'b011_0000001: q = 4'b0000; + 10'b011_0000010: q = 4'b0000; + 10'b011_0000011: q = 4'b0000; + 10'b011_0000100: q = 4'b0100; + 10'b011_0000101: q = 4'b0100; + 10'b011_0000110: q = 4'b0100; + 10'b011_0000111: q = 4'b0100; + 10'b011_0001000: q = 4'b0100; + 10'b011_0001001: q = 4'b0100; + 10'b011_0001010: q = 4'b0100; + 10'b011_0001011: q = 4'b0100; + 10'b011_0001100: q = 4'b0100; + 10'b011_0001101: q = 4'b0100; + 10'b011_0001110: q = 4'b0100; + 10'b011_0001111: q = 4'b0100; + 10'b011_0010000: q = 4'b1000; + 10'b011_0010001: q = 4'b1000; + 10'b011_0010010: q = 4'b1000; + 10'b011_0010011: q = 4'b1000; + 10'b011_0010100: q = 4'b1000; + 10'b011_0010101: q = 4'b1000; + 10'b011_0010110: q = 4'b1000; + 10'b011_0010111: q = 4'b1000; + 10'b011_0011000: q = 4'b1000; + 10'b011_0011001: q = 4'b1000; + 10'b011_0011010: q = 4'b1000; + 10'b011_0011011: q = 4'b1000; + 10'b011_0011100: q = 4'b1000; + 10'b011_0011101: q = 4'b1000; + 10'b011_0011110: q = 4'b1000; + 10'b011_0011111: q = 4'b1000; + 10'b011_0100000: q = 4'b1000; + 10'b011_0100001: q = 4'b1000; + 10'b011_0100010: q = 4'b1000; + 10'b011_0100011: q = 4'b1000; + 10'b011_0100100: q = 4'b1000; + 10'b011_0100101: q = 4'b1000; + 10'b011_0100110: q = 4'b1000; + 10'b011_0100111: q = 4'b1000; + 10'b011_0101000: q = 4'b1000; + 10'b011_0101001: q = 4'b1000; + 10'b011_0101010: q = 4'b1000; + 10'b011_0101011: q = 4'b1000; + 10'b011_0101100: q = 4'b1000; + 10'b011_0101101: q = 4'b1000; + 10'b011_0101110: q = 4'b1000; + 10'b011_0101111: q = 4'b1000; + 10'b011_0110000: q = 4'b1000; + 10'b011_0110001: q = 4'b1000; + 10'b011_0110010: q = 4'b1000; + 10'b011_0110011: q = 4'b1000; + 10'b011_0110100: q = 4'b1000; + 10'b011_0110101: q = 4'b1000; + 10'b011_0110110: q = 4'b1000; + 10'b011_0110111: q = 4'b1000; + 10'b011_0111000: q = 4'b1000; + 10'b011_0111001: q = 4'b1000; + 10'b011_0111010: q = 4'b1000; + 10'b011_0111011: q = 4'b1000; + 10'b011_0111100: q = 4'b1000; + 10'b011_0111101: q = 4'b1000; + 10'b011_0111110: q = 4'b1000; + 10'b011_0111111: q = 4'b1000; + 10'b011_1000000: q = 4'b0001; + 10'b011_1000001: q = 4'b0001; + 10'b011_1000010: q = 4'b0001; + 10'b011_1000011: q = 4'b0001; + 10'b011_1000100: q = 4'b0001; + 10'b011_1000101: q = 4'b0001; + 10'b011_1000110: q = 4'b0001; + 10'b011_1000111: q = 4'b0001; + 10'b011_1001000: q = 4'b0001; + 10'b011_1001001: q = 4'b0001; + 10'b011_1001010: q = 4'b0001; + 10'b011_1001011: q = 4'b0001; + 10'b011_1001100: q = 4'b0001; + 10'b011_1001101: q = 4'b0001; + 10'b011_1001110: q = 4'b0001; + 10'b011_1001111: q = 4'b0001; + 10'b011_1010000: q = 4'b0001; + 10'b011_1010001: q = 4'b0001; + 10'b011_1010010: q = 4'b0001; + 10'b011_1010011: q = 4'b0001; + 10'b011_1010100: q = 4'b0001; + 10'b011_1010101: q = 4'b0001; + 10'b011_1010110: q = 4'b0001; + 10'b011_1010111: q = 4'b0001; + 10'b011_1011000: q = 4'b0001; + 10'b011_1011001: q = 4'b0001; + 10'b011_1011010: q = 4'b0001; + 10'b011_1011011: q = 4'b0001; + 10'b011_1011100: q = 4'b0001; + 10'b011_1011101: q = 4'b0001; + 10'b011_1011110: q = 4'b0001; + 10'b011_1011111: q = 4'b0001; + 10'b011_1100000: q = 4'b0001; + 10'b011_1100001: q = 4'b0001; + 10'b011_1100010: q = 4'b0001; + 10'b011_1100011: q = 4'b0001; + 10'b011_1100100: q = 4'b0001; + 10'b011_1100101: q = 4'b0001; + 10'b011_1100110: q = 4'b0001; + 10'b011_1100111: q = 4'b0001; + 10'b011_1101000: q = 4'b0001; + 10'b011_1101001: q = 4'b0001; + 10'b011_1101010: q = 4'b0001; + 10'b011_1101011: q = 4'b0001; + 10'b011_1101100: q = 4'b0001; + 10'b011_1101101: q = 4'b0001; + 10'b011_1101110: q = 4'b0010; + 10'b011_1101111: q = 4'b0010; + 10'b011_1110000: q = 4'b0010; + 10'b011_1110001: q = 4'b0010; + 10'b011_1110010: q = 4'b0010; + 10'b011_1110011: q = 4'b0010; + 10'b011_1110100: q = 4'b0010; + 10'b011_1110101: q = 4'b0010; + 10'b011_1110110: q = 4'b0010; + 10'b011_1110111: q = 4'b0010; + 10'b011_1111000: q = 4'b0010; + 10'b011_1111001: q = 4'b0010; + 10'b011_1111010: q = 4'b0000; + 10'b011_1111011: q = 4'b0000; + 10'b011_1111100: q = 4'b0000; + 10'b011_1111101: q = 4'b0000; + 10'b011_1111110: q = 4'b0000; + 10'b011_1111111: q = 4'b0000; + 10'b100_0000000: q = 4'b0000; + 10'b100_0000001: q = 4'b0000; + 10'b100_0000010: q = 4'b0000; + 10'b100_0000011: q = 4'b0000; + 10'b100_0000100: q = 4'b0000; + 10'b100_0000101: q = 4'b0000; + 10'b100_0000110: q = 4'b0100; + 10'b100_0000111: q = 4'b0100; + 10'b100_0001000: q = 4'b0100; + 10'b100_0001001: q = 4'b0100; + 10'b100_0001010: q = 4'b0100; + 10'b100_0001011: q = 4'b0100; + 10'b100_0001100: q = 4'b0100; + 10'b100_0001101: q = 4'b0100; + 10'b100_0001110: q = 4'b0100; + 10'b100_0001111: q = 4'b0100; + 10'b100_0010000: q = 4'b0100; + 10'b100_0010001: q = 4'b0100; + 10'b100_0010010: q = 4'b1000; + 10'b100_0010011: q = 4'b1000; + 10'b100_0010100: q = 4'b1000; + 10'b100_0010101: q = 4'b1000; + 10'b100_0010110: q = 4'b1000; + 10'b100_0010111: q = 4'b1000; + 10'b100_0011000: q = 4'b1000; + 10'b100_0011001: q = 4'b1000; + 10'b100_0011010: q = 4'b1000; + 10'b100_0011011: q = 4'b1000; + 10'b100_0011100: q = 4'b1000; + 10'b100_0011101: q = 4'b1000; + 10'b100_0011110: q = 4'b1000; + 10'b100_0011111: q = 4'b1000; + 10'b100_0100000: q = 4'b1000; + 10'b100_0100001: q = 4'b1000; + 10'b100_0100010: q = 4'b1000; + 10'b100_0100011: q = 4'b1000; + 10'b100_0100100: q = 4'b1000; + 10'b100_0100101: q = 4'b1000; + 10'b100_0100110: q = 4'b1000; + 10'b100_0100111: q = 4'b1000; + 10'b100_0101000: q = 4'b1000; + 10'b100_0101001: q = 4'b1000; + 10'b100_0101010: q = 4'b1000; + 10'b100_0101011: q = 4'b1000; + 10'b100_0101100: q = 4'b1000; + 10'b100_0101101: q = 4'b1000; + 10'b100_0101110: q = 4'b1000; + 10'b100_0101111: q = 4'b1000; + 10'b100_0110000: q = 4'b1000; + 10'b100_0110001: q = 4'b1000; + 10'b100_0110010: q = 4'b1000; + 10'b100_0110011: q = 4'b1000; + 10'b100_0110100: q = 4'b1000; + 10'b100_0110101: q = 4'b1000; + 10'b100_0110110: q = 4'b1000; + 10'b100_0110111: q = 4'b1000; + 10'b100_0111000: q = 4'b1000; + 10'b100_0111001: q = 4'b1000; + 10'b100_0111010: q = 4'b1000; + 10'b100_0111011: q = 4'b1000; + 10'b100_0111100: q = 4'b1000; + 10'b100_0111101: q = 4'b1000; + 10'b100_0111110: q = 4'b1000; + 10'b100_0111111: q = 4'b1000; + 10'b100_1000000: q = 4'b0001; + 10'b100_1000001: q = 4'b0001; + 10'b100_1000010: q = 4'b0001; + 10'b100_1000011: q = 4'b0001; + 10'b100_1000100: q = 4'b0001; + 10'b100_1000101: q = 4'b0001; + 10'b100_1000110: q = 4'b0001; + 10'b100_1000111: q = 4'b0001; + 10'b100_1001000: q = 4'b0001; + 10'b100_1001001: q = 4'b0001; + 10'b100_1001010: q = 4'b0001; + 10'b100_1001011: q = 4'b0001; + 10'b100_1001100: q = 4'b0001; + 10'b100_1001101: q = 4'b0001; + 10'b100_1001110: q = 4'b0001; + 10'b100_1001111: q = 4'b0001; + 10'b100_1010000: q = 4'b0001; + 10'b100_1010001: q = 4'b0001; + 10'b100_1010010: q = 4'b0001; + 10'b100_1010011: q = 4'b0001; + 10'b100_1010100: q = 4'b0001; + 10'b100_1010101: q = 4'b0001; + 10'b100_1010110: q = 4'b0001; + 10'b100_1010111: q = 4'b0001; + 10'b100_1011000: q = 4'b0001; + 10'b100_1011001: q = 4'b0001; + 10'b100_1011010: q = 4'b0001; + 10'b100_1011011: q = 4'b0001; + 10'b100_1011100: q = 4'b0001; + 10'b100_1011101: q = 4'b0001; + 10'b100_1011110: q = 4'b0001; + 10'b100_1011111: q = 4'b0001; + 10'b100_1100000: q = 4'b0001; + 10'b100_1100001: q = 4'b0001; + 10'b100_1100010: q = 4'b0001; + 10'b100_1100011: q = 4'b0001; + 10'b100_1100100: q = 4'b0001; + 10'b100_1100101: q = 4'b0001; + 10'b100_1100110: q = 4'b0001; + 10'b100_1100111: q = 4'b0001; + 10'b100_1101000: q = 4'b0001; + 10'b100_1101001: q = 4'b0001; + 10'b100_1101010: q = 4'b0001; + 10'b100_1101011: q = 4'b0001; + 10'b100_1101100: q = 4'b0010; + 10'b100_1101101: q = 4'b0010; + 10'b100_1101110: q = 4'b0010; + 10'b100_1101111: q = 4'b0010; + 10'b100_1110000: q = 4'b0010; + 10'b100_1110001: q = 4'b0010; + 10'b100_1110010: q = 4'b0010; + 10'b100_1110011: q = 4'b0010; + 10'b100_1110100: q = 4'b0010; + 10'b100_1110101: q = 4'b0010; + 10'b100_1110110: q = 4'b0010; + 10'b100_1110111: q = 4'b0010; + 10'b100_1111000: q = 4'b0000; + 10'b100_1111001: q = 4'b0000; + 10'b100_1111010: q = 4'b0000; + 10'b100_1111011: q = 4'b0000; + 10'b100_1111100: q = 4'b0000; + 10'b100_1111101: q = 4'b0000; + 10'b100_1111110: q = 4'b0000; + 10'b100_1111111: q = 4'b0000; + 10'b101_0000000: q = 4'b0000; + 10'b101_0000001: q = 4'b0000; + 10'b101_0000010: q = 4'b0000; + 10'b101_0000011: q = 4'b0000; + 10'b101_0000100: q = 4'b0000; + 10'b101_0000101: q = 4'b0000; + 10'b101_0000110: q = 4'b0100; + 10'b101_0000111: q = 4'b0100; + 10'b101_0001000: q = 4'b0100; + 10'b101_0001001: q = 4'b0100; + 10'b101_0001010: q = 4'b0100; + 10'b101_0001011: q = 4'b0100; + 10'b101_0001100: q = 4'b0100; + 10'b101_0001101: q = 4'b0100; + 10'b101_0001110: q = 4'b0100; + 10'b101_0001111: q = 4'b0100; + 10'b101_0010000: q = 4'b0100; + 10'b101_0010001: q = 4'b0100; + 10'b101_0010010: q = 4'b0100; + 10'b101_0010011: q = 4'b0100; + 10'b101_0010100: q = 4'b1000; + 10'b101_0010101: q = 4'b1000; + 10'b101_0010110: q = 4'b1000; + 10'b101_0010111: q = 4'b1000; + 10'b101_0011000: q = 4'b1000; + 10'b101_0011001: q = 4'b1000; + 10'b101_0011010: q = 4'b1000; + 10'b101_0011011: q = 4'b1000; + 10'b101_0011100: q = 4'b1000; + 10'b101_0011101: q = 4'b1000; + 10'b101_0011110: q = 4'b1000; + 10'b101_0011111: q = 4'b1000; + 10'b101_0100000: q = 4'b1000; + 10'b101_0100001: q = 4'b1000; + 10'b101_0100010: q = 4'b1000; + 10'b101_0100011: q = 4'b1000; + 10'b101_0100100: q = 4'b1000; + 10'b101_0100101: q = 4'b1000; + 10'b101_0100110: q = 4'b1000; + 10'b101_0100111: q = 4'b1000; + 10'b101_0101000: q = 4'b1000; + 10'b101_0101001: q = 4'b1000; + 10'b101_0101010: q = 4'b1000; + 10'b101_0101011: q = 4'b1000; + 10'b101_0101100: q = 4'b1000; + 10'b101_0101101: q = 4'b1000; + 10'b101_0101110: q = 4'b1000; + 10'b101_0101111: q = 4'b1000; + 10'b101_0110000: q = 4'b1000; + 10'b101_0110001: q = 4'b1000; + 10'b101_0110010: q = 4'b1000; + 10'b101_0110011: q = 4'b1000; + 10'b101_0110100: q = 4'b1000; + 10'b101_0110101: q = 4'b1000; + 10'b101_0110110: q = 4'b1000; + 10'b101_0110111: q = 4'b1000; + 10'b101_0111000: q = 4'b1000; + 10'b101_0111001: q = 4'b1000; + 10'b101_0111010: q = 4'b1000; + 10'b101_0111011: q = 4'b1000; + 10'b101_0111100: q = 4'b1000; + 10'b101_0111101: q = 4'b1000; + 10'b101_0111110: q = 4'b1000; + 10'b101_0111111: q = 4'b1000; + 10'b101_1000000: q = 4'b0001; + 10'b101_1000001: q = 4'b0001; + 10'b101_1000010: q = 4'b0001; + 10'b101_1000011: q = 4'b0001; + 10'b101_1000100: q = 4'b0001; + 10'b101_1000101: q = 4'b0001; + 10'b101_1000110: q = 4'b0001; + 10'b101_1000111: q = 4'b0001; + 10'b101_1001000: q = 4'b0001; + 10'b101_1001001: q = 4'b0001; + 10'b101_1001010: q = 4'b0001; + 10'b101_1001011: q = 4'b0001; + 10'b101_1001100: q = 4'b0001; + 10'b101_1001101: q = 4'b0001; + 10'b101_1001110: q = 4'b0001; + 10'b101_1001111: q = 4'b0001; + 10'b101_1010000: q = 4'b0001; + 10'b101_1010001: q = 4'b0001; + 10'b101_1010010: q = 4'b0001; + 10'b101_1010011: q = 4'b0001; + 10'b101_1010100: q = 4'b0001; + 10'b101_1010101: q = 4'b0001; + 10'b101_1010110: q = 4'b0001; + 10'b101_1010111: q = 4'b0001; + 10'b101_1011000: q = 4'b0001; + 10'b101_1011001: q = 4'b0001; + 10'b101_1011010: q = 4'b0001; + 10'b101_1011011: q = 4'b0001; + 10'b101_1011100: q = 4'b0001; + 10'b101_1011101: q = 4'b0001; + 10'b101_1011110: q = 4'b0001; + 10'b101_1011111: q = 4'b0001; + 10'b101_1100000: q = 4'b0001; + 10'b101_1100001: q = 4'b0001; + 10'b101_1100010: q = 4'b0001; + 10'b101_1100011: q = 4'b0001; + 10'b101_1100100: q = 4'b0001; + 10'b101_1100101: q = 4'b0001; + 10'b101_1100110: q = 4'b0001; + 10'b101_1100111: q = 4'b0001; + 10'b101_1101000: q = 4'b0001; + 10'b101_1101001: q = 4'b0001; + 10'b101_1101010: q = 4'b0001; + 10'b101_1101011: q = 4'b0001; + 10'b101_1101100: q = 4'b0010; + 10'b101_1101101: q = 4'b0010; + 10'b101_1101110: q = 4'b0010; + 10'b101_1101111: q = 4'b0010; + 10'b101_1110000: q = 4'b0010; + 10'b101_1110001: q = 4'b0010; + 10'b101_1110010: q = 4'b0010; + 10'b101_1110011: q = 4'b0010; + 10'b101_1110100: q = 4'b0010; + 10'b101_1110101: q = 4'b0010; + 10'b101_1110110: q = 4'b0010; + 10'b101_1110111: q = 4'b0010; + 10'b101_1111000: q = 4'b0000; + 10'b101_1111001: q = 4'b0000; + 10'b101_1111010: q = 4'b0000; + 10'b101_1111011: q = 4'b0000; + 10'b101_1111100: q = 4'b0000; + 10'b101_1111101: q = 4'b0000; + 10'b101_1111110: q = 4'b0000; + 10'b101_1111111: q = 4'b0000; + 10'b110_0000000: q = 4'b0000; + 10'b110_0000001: q = 4'b0000; + 10'b110_0000010: q = 4'b0000; + 10'b110_0000011: q = 4'b0000; + 10'b110_0000100: q = 4'b0000; + 10'b110_0000101: q = 4'b0000; + 10'b110_0000110: q = 4'b0000; + 10'b110_0000111: q = 4'b0000; + 10'b110_0001000: q = 4'b0100; + 10'b110_0001001: q = 4'b0100; + 10'b110_0001010: q = 4'b0100; + 10'b110_0001011: q = 4'b0100; + 10'b110_0001100: q = 4'b0100; + 10'b110_0001101: q = 4'b0100; + 10'b110_0001110: q = 4'b0100; + 10'b110_0001111: q = 4'b0100; + 10'b110_0010000: q = 4'b0100; + 10'b110_0010001: q = 4'b0100; + 10'b110_0010010: q = 4'b0100; + 10'b110_0010011: q = 4'b0100; + 10'b110_0010100: q = 4'b1000; + 10'b110_0010101: q = 4'b1000; + 10'b110_0010110: q = 4'b1000; + 10'b110_0010111: q = 4'b1000; + 10'b110_0011000: q = 4'b1000; + 10'b110_0011001: q = 4'b1000; + 10'b110_0011010: q = 4'b1000; + 10'b110_0011011: q = 4'b1000; + 10'b110_0011100: q = 4'b1000; + 10'b110_0011101: q = 4'b1000; + 10'b110_0011110: q = 4'b1000; + 10'b110_0011111: q = 4'b1000; + 10'b110_0100000: q = 4'b1000; + 10'b110_0100001: q = 4'b1000; + 10'b110_0100010: q = 4'b1000; + 10'b110_0100011: q = 4'b1000; + 10'b110_0100100: q = 4'b1000; + 10'b110_0100101: q = 4'b1000; + 10'b110_0100110: q = 4'b1000; + 10'b110_0100111: q = 4'b1000; + 10'b110_0101000: q = 4'b1000; + 10'b110_0101001: q = 4'b1000; + 10'b110_0101010: q = 4'b1000; + 10'b110_0101011: q = 4'b1000; + 10'b110_0101100: q = 4'b1000; + 10'b110_0101101: q = 4'b1000; + 10'b110_0101110: q = 4'b1000; + 10'b110_0101111: q = 4'b1000; + 10'b110_0110000: q = 4'b1000; + 10'b110_0110001: q = 4'b1000; + 10'b110_0110010: q = 4'b1000; + 10'b110_0110011: q = 4'b1000; + 10'b110_0110100: q = 4'b1000; + 10'b110_0110101: q = 4'b1000; + 10'b110_0110110: q = 4'b1000; + 10'b110_0110111: q = 4'b1000; + 10'b110_0111000: q = 4'b1000; + 10'b110_0111001: q = 4'b1000; + 10'b110_0111010: q = 4'b1000; + 10'b110_0111011: q = 4'b1000; + 10'b110_0111100: q = 4'b1000; + 10'b110_0111101: q = 4'b1000; + 10'b110_0111110: q = 4'b1000; + 10'b110_0111111: q = 4'b1000; + 10'b110_1000000: q = 4'b0001; + 10'b110_1000001: q = 4'b0001; + 10'b110_1000010: q = 4'b0001; + 10'b110_1000011: q = 4'b0001; + 10'b110_1000100: q = 4'b0001; + 10'b110_1000101: q = 4'b0001; + 10'b110_1000110: q = 4'b0001; + 10'b110_1000111: q = 4'b0001; + 10'b110_1001000: q = 4'b0001; + 10'b110_1001001: q = 4'b0001; + 10'b110_1001010: q = 4'b0001; + 10'b110_1001011: q = 4'b0001; + 10'b110_1001100: q = 4'b0001; + 10'b110_1001101: q = 4'b0001; + 10'b110_1001110: q = 4'b0001; + 10'b110_1001111: q = 4'b0001; + 10'b110_1010000: q = 4'b0001; + 10'b110_1010001: q = 4'b0001; + 10'b110_1010010: q = 4'b0001; + 10'b110_1010011: q = 4'b0001; + 10'b110_1010100: q = 4'b0001; + 10'b110_1010101: q = 4'b0001; + 10'b110_1010110: q = 4'b0001; + 10'b110_1010111: q = 4'b0001; + 10'b110_1011000: q = 4'b0001; + 10'b110_1011001: q = 4'b0001; + 10'b110_1011010: q = 4'b0001; + 10'b110_1011011: q = 4'b0001; + 10'b110_1011100: q = 4'b0001; + 10'b110_1011101: q = 4'b0001; + 10'b110_1011110: q = 4'b0001; + 10'b110_1011111: q = 4'b0001; + 10'b110_1100000: q = 4'b0001; + 10'b110_1100001: q = 4'b0001; + 10'b110_1100010: q = 4'b0001; + 10'b110_1100011: q = 4'b0001; + 10'b110_1100100: q = 4'b0001; + 10'b110_1100101: q = 4'b0001; + 10'b110_1100110: q = 4'b0001; + 10'b110_1100111: q = 4'b0001; + 10'b110_1101000: q = 4'b0001; + 10'b110_1101001: q = 4'b0001; + 10'b110_1101010: q = 4'b0010; + 10'b110_1101011: q = 4'b0010; + 10'b110_1101100: q = 4'b0010; + 10'b110_1101101: q = 4'b0010; + 10'b110_1101110: q = 4'b0010; + 10'b110_1101111: q = 4'b0010; + 10'b110_1110000: q = 4'b0010; + 10'b110_1110001: q = 4'b0010; + 10'b110_1110010: q = 4'b0010; + 10'b110_1110011: q = 4'b0010; + 10'b110_1110100: q = 4'b0010; + 10'b110_1110101: q = 4'b0010; + 10'b110_1110110: q = 4'b0010; + 10'b110_1110111: q = 4'b0010; + 10'b110_1111000: q = 4'b0000; + 10'b110_1111001: q = 4'b0000; + 10'b110_1111010: q = 4'b0000; + 10'b110_1111011: q = 4'b0000; + 10'b110_1111100: q = 4'b0000; + 10'b110_1111101: q = 4'b0000; + 10'b110_1111110: q = 4'b0000; + 10'b110_1111111: q = 4'b0000; + 10'b111_0000000: q = 4'b0000; + 10'b111_0000001: q = 4'b0000; + 10'b111_0000010: q = 4'b0000; + 10'b111_0000011: q = 4'b0000; + 10'b111_0000100: q = 4'b0000; + 10'b111_0000101: q = 4'b0000; + 10'b111_0000110: q = 4'b0000; + 10'b111_0000111: q = 4'b0000; + 10'b111_0001000: q = 4'b0100; + 10'b111_0001001: q = 4'b0100; + 10'b111_0001010: q = 4'b0100; + 10'b111_0001011: q = 4'b0100; + 10'b111_0001100: q = 4'b0100; + 10'b111_0001101: q = 4'b0100; + 10'b111_0001110: q = 4'b0100; + 10'b111_0001111: q = 4'b0100; + 10'b111_0010000: q = 4'b0100; + 10'b111_0010001: q = 4'b0100; + 10'b111_0010010: q = 4'b0100; + 10'b111_0010011: q = 4'b0100; + 10'b111_0010100: q = 4'b0100; + 10'b111_0010101: q = 4'b0100; + 10'b111_0010110: q = 4'b0100; + 10'b111_0010111: q = 4'b0100; + 10'b111_0011000: q = 4'b1000; + 10'b111_0011001: q = 4'b1000; + 10'b111_0011010: q = 4'b1000; + 10'b111_0011011: q = 4'b1000; + 10'b111_0011100: q = 4'b1000; + 10'b111_0011101: q = 4'b1000; + 10'b111_0011110: q = 4'b1000; + 10'b111_0011111: q = 4'b1000; + 10'b111_0100000: q = 4'b1000; + 10'b111_0100001: q = 4'b1000; + 10'b111_0100010: q = 4'b1000; + 10'b111_0100011: q = 4'b1000; + 10'b111_0100100: q = 4'b1000; + 10'b111_0100101: q = 4'b1000; + 10'b111_0100110: q = 4'b1000; + 10'b111_0100111: q = 4'b1000; + 10'b111_0101000: q = 4'b1000; + 10'b111_0101001: q = 4'b1000; + 10'b111_0101010: q = 4'b1000; + 10'b111_0101011: q = 4'b1000; + 10'b111_0101100: q = 4'b1000; + 10'b111_0101101: q = 4'b1000; + 10'b111_0101110: q = 4'b1000; + 10'b111_0101111: q = 4'b1000; + 10'b111_0110000: q = 4'b1000; + 10'b111_0110001: q = 4'b1000; + 10'b111_0110010: q = 4'b1000; + 10'b111_0110011: q = 4'b1000; + 10'b111_0110100: q = 4'b1000; + 10'b111_0110101: q = 4'b1000; + 10'b111_0110110: q = 4'b1000; + 10'b111_0110111: q = 4'b1000; + 10'b111_0111000: q = 4'b1000; + 10'b111_0111001: q = 4'b1000; + 10'b111_0111010: q = 4'b1000; + 10'b111_0111011: q = 4'b1000; + 10'b111_0111100: q = 4'b1000; + 10'b111_0111101: q = 4'b1000; + 10'b111_0111110: q = 4'b1000; + 10'b111_0111111: q = 4'b1000; + 10'b111_1000000: q = 4'b0001; + 10'b111_1000001: q = 4'b0001; + 10'b111_1000010: q = 4'b0001; + 10'b111_1000011: q = 4'b0001; + 10'b111_1000100: q = 4'b0001; + 10'b111_1000101: q = 4'b0001; + 10'b111_1000110: q = 4'b0001; + 10'b111_1000111: q = 4'b0001; + 10'b111_1001000: q = 4'b0001; + 10'b111_1001001: q = 4'b0001; + 10'b111_1001010: q = 4'b0001; + 10'b111_1001011: q = 4'b0001; + 10'b111_1001100: q = 4'b0001; + 10'b111_1001101: q = 4'b0001; + 10'b111_1001110: q = 4'b0001; + 10'b111_1001111: q = 4'b0001; + 10'b111_1010000: q = 4'b0001; + 10'b111_1010001: q = 4'b0001; + 10'b111_1010010: q = 4'b0001; + 10'b111_1010011: q = 4'b0001; + 10'b111_1010100: q = 4'b0001; + 10'b111_1010101: q = 4'b0001; + 10'b111_1010110: q = 4'b0001; + 10'b111_1010111: q = 4'b0001; + 10'b111_1011000: q = 4'b0001; + 10'b111_1011001: q = 4'b0001; + 10'b111_1011010: q = 4'b0001; + 10'b111_1011011: q = 4'b0001; + 10'b111_1011100: q = 4'b0001; + 10'b111_1011101: q = 4'b0001; + 10'b111_1011110: q = 4'b0001; + 10'b111_1011111: q = 4'b0001; + 10'b111_1100000: q = 4'b0001; + 10'b111_1100001: q = 4'b0001; + 10'b111_1100010: q = 4'b0001; + 10'b111_1100011: q = 4'b0001; + 10'b111_1100100: q = 4'b0001; + 10'b111_1100101: q = 4'b0001; + 10'b111_1100110: q = 4'b0001; + 10'b111_1100111: q = 4'b0001; + 10'b111_1101000: q = 4'b0010; + 10'b111_1101001: q = 4'b0010; + 10'b111_1101010: q = 4'b0010; + 10'b111_1101011: q = 4'b0010; + 10'b111_1101100: q = 4'b0010; + 10'b111_1101101: q = 4'b0010; + 10'b111_1101110: q = 4'b0010; + 10'b111_1101111: q = 4'b0010; + 10'b111_1110000: q = 4'b0010; + 10'b111_1110001: q = 4'b0010; + 10'b111_1110010: q = 4'b0010; + 10'b111_1110011: q = 4'b0010; + 10'b111_1110100: q = 4'b0010; + 10'b111_1110101: q = 4'b0010; + 10'b111_1110110: q = 4'b0010; + 10'b111_1110111: q = 4'b0010; + 10'b111_1111000: q = 4'b0000; + 10'b111_1111001: q = 4'b0000; + 10'b111_1111010: q = 4'b0000; + 10'b111_1111011: q = 4'b0000; + 10'b111_1111100: q = 4'b0000; + 10'b111_1111101: q = 4'b0000; + 10'b111_1111110: q = 4'b0000; + 10'b111_1111111: q = 4'b0000; + endcase diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv new file mode 100644 index 000000000..985d0ad75 --- /dev/null +++ b/pipelined/srt/testbench.sv @@ -0,0 +1,219 @@ +`define DIVLEN 64 + +///////////// +// counter // +///////////// +// module counter(input logic clk, +// input logic req, +// output logic done); + +// logic [7:0] count; + +// // This block of control logic sequences the divider +// // through its iterations. You may modify it if you +// // build a divider which completes in fewer iterations. +// // You are not responsible for the (trivial) circuit +// // design of the block. + +// always @(posedge clk) +// begin +// if (count == `DIVLEN + 2) done <= #1 1; +// else if (done | req) done <= #1 0; +// if (req) count <= #1 0; +// else count <= #1 count+1; +// end +// endmodule + +/////////// +// clock // +/////////// +module clock(clk); + output clk; + + // Internal clk signal + logic clk; + +endmodule + +////////// +// testbench // +////////// +module testbench; + logic clk; + logic req; + logic done; + logic Int; + logic [63:0] a, b; + logic [51:0] afrac, bfrac; + logic [10:0] aExp, bExp; + logic asign, bsign; + logic [51:0] r; + logic [63:0] rInt; + logic [`DIVLEN-1:0] Quot; + + // Test parameters + parameter MEM_SIZE = 40000; + parameter MEM_WIDTH = 64+64+64+64; + + // INT TEST SIZES + // `define memrem 63:0 + // `define memr 127:64 + // `define memb 191:128 + // `define mema 255:192 + + // FLOAT TEST SIZES +<<<<<<< Updated upstream + `define memr 63:0 + `define memb 127:64 + `define mema 191:128 +======= + // `define memr 63:0 + // `define memb 127:64 + // `define mema 191:128 + + // SQRT TEST SIZES + `define memr 63:0 + `define mema 127:64 + `define memb 191:128 +>>>>>>> Stashed changes + + // Test logicisters + logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file + logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a + // bit field of an array + logic [63:0] correctr, nextr, diffn, diffp; + logic [10:0] rExp; + logic rsign; + integer testnum, errors; + +<<<<<<< Updated upstream + assign Int = 1'b0; +======= + // Equip Int test or Sqrt test + assign Int = 1'b0; + assign Sqrt = 1'b1; +>>>>>>> Stashed changes + + // Divider + srt srt(.clk, .Start(req), + .Stall(1'b0), .Flush(1'b0), + .XExp(aExp), .YExp(bExp), .rExp, + .XSign(asign), .YSign(bsign), .rsign, + .SrcXFrac(afrac), .SrcYFrac(bfrac), + .SrcA(a), .SrcB(b), .Fmt(2'b00), +<<<<<<< Updated upstream + .W64(1'b1), .Signed(1'b0), .Int, .Sqrt(1'b0), +======= + .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, +>>>>>>> Stashed changes + .Quot, .Rem(), .Flags(), .done); + + // Counter + // counter counter(clk, req, done); + + + initial + forever + begin + clk = 1; #17; + clk = 0; #16; + end + + + // Read test vectors from disk + initial + begin + testnum = 0; + errors = 0; +<<<<<<< Updated upstream + $readmemh ("testvectors", Tests); +======= + $readmemh ("sqrttestvectors", Tests); +>>>>>>> Stashed changes + Vec = Tests[testnum]; + a = Vec[`mema]; + {asign, aExp, afrac} = a; + b = Vec[`memb]; + {bsign, bExp, bfrac} = b; + nextr = Vec[`memr]; + r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; + rInt = Quot; + req <= #5 1; + end + + // Apply directed test vectors read from file. + + always @(posedge clk) begin + r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; + rInt = Quot; + if (done) begin + if (~Int & ~Sqrt) begin + req <= #5 1; + diffp = correctr[51:0] - r; + diffn = r - correctr[51:0]; + if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + begin + errors = errors+1; + $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp); + $display("failed\n"); + $stop; + end + if (afrac === 52'hxxxxxxxxxxxxx) + begin + $display("%d Tests completed successfully", testnum); + $stop; + end + end else if (~Sqrt) begin + req <= #5 1; + diffp = correctr[63:0] - rInt; + diffn = rInt - correctr[63:0]; + if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + begin + errors = errors+1; + $display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp); + $display("failed\n"); + $stop; + end + if (afrac === 52'hxxxxxxxxxxxxx) + begin + $display("%d Tests completed successfully", testnum); + $stop; + end + end else begin + req <= #5 1; + diffp = correctr[51:0] - r; + diffn = r - correctr[51:0]; +<<<<<<< Updated upstream + if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + begin + errors = errors + 1; + $display("result was %h, should be %h %h %h\n", rSqrt, correctr, diffn, diffp); +======= + if (rExp !== correctr[62:52]) // check if accurate to 1 ulp + begin + errors = errors + 1; + $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp); +>>>>>>> Stashed changes + $display("failed\n"); + $stop; + end + if (afrac === 52'hxxxxxxxxxxxxx) begin + $display("%d Tests completed successfully", testnum); + $stop; end + end + end + if (req) begin + req <= #5 0; + correctr = nextr; + testnum = testnum+1; + Vec = Tests[testnum]; + $display("a = %h b = %h",a,b); + a = Vec[`mema]; + {asign, aExp, afrac} = a; + b = Vec[`memb]; + {bsign, bExp, bfrac} = b; + nextr = Vec[`memr]; + end + end +endmodule + diff --git a/pipelined/srt/testgen.c b/pipelined/srt/testgen.c new file mode 100644 index 000000000..98d52819b --- /dev/null +++ b/pipelined/srt/testgen.c @@ -0,0 +1,94 @@ +/* testgen.c */ + +/* Written 10/31/96 by David Harris + + This program creates test vectors for mantissa component + of an IEEE floating point divider. + */ + +/* #includes */ + +#include +#include +#include + +/* Constants */ + +#define ENTRIES 17 +#define RANDOM_VECS 500 + +/* Prototypes */ + +void output(FILE *fptr, double a, double b, double r); +void printhex(FILE *fptr, double x); +double random_input(void); + +/* Main */ + +void main(void) +{ + FILE *fptr; + double a, b, r; + double list[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, + 1.75, 1.875, 1.99999, + 1.1, 1.2, 1.01, 1.001, 1.0001, + 1/1.1, 1/1.5, 1/1.25, 1/1.125}; + int i, j; + + if ((fptr = fopen("testvectors","w")) == NULL) { + fprintf(stderr, "Couldn't write testvectors file\n"); + exit(1); + } + + for (i=0; i2) m /= 2; + for (i=0; i<52; i+=4) { + m = m - floor(m); + m = m * 16; + val = (int)(m)%16; + fprintf(fptr, "%x", val); + } +} + +double random_input(void) +{ + return 1.0 + rand()/32767.0; +} +