From 3edf910c18e16c936458940c64f894cae0ecaf48 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Feb 2021 15:38:44 -0500 Subject: [PATCH] Started to integrate OSU divider --- wally-pipelined/src/muldiv/div/README | 1 + wally-pipelined/src/muldiv/div/README.md | 22 + wally-pipelined/src/muldiv/div/divide4x32.sv | 1302 +++++++++++ wally-pipelined/src/muldiv/div/divide4x64.sv | 1921 +++++++++++++++++ wally-pipelined/src/muldiv/div/int32div.do | 114 + wally-pipelined/src/muldiv/div/int64div.do | 114 + wally-pipelined/src/muldiv/div/iter32.do | 50 + wally-pipelined/src/muldiv/div/iter64.do | 50 + wally-pipelined/src/muldiv/div/muxs.sv | 51 + wally-pipelined/src/muldiv/div/shifters.sv | 106 + .../src/muldiv/div/test_int32div.sv | 50 + .../src/muldiv/div/test_int64div.sv | 51 + wally-pipelined/src/muldiv/div/test_iter32.sv | 74 + wally-pipelined/src/muldiv/div/test_iter64.sv | 72 + wally-pipelined/src/muldiv/muldiv.sv | 10 + 15 files changed, 3988 insertions(+) create mode 100755 wally-pipelined/src/muldiv/div/README create mode 100644 wally-pipelined/src/muldiv/div/README.md create mode 100755 wally-pipelined/src/muldiv/div/divide4x32.sv create mode 100755 wally-pipelined/src/muldiv/div/divide4x64.sv create mode 100755 wally-pipelined/src/muldiv/div/int32div.do create mode 100755 wally-pipelined/src/muldiv/div/int64div.do create mode 100755 wally-pipelined/src/muldiv/div/iter32.do create mode 100755 wally-pipelined/src/muldiv/div/iter64.do create mode 100644 wally-pipelined/src/muldiv/div/muxs.sv create mode 100644 wally-pipelined/src/muldiv/div/shifters.sv create mode 100755 wally-pipelined/src/muldiv/div/test_int32div.sv create mode 100644 wally-pipelined/src/muldiv/div/test_int64div.sv create mode 100755 wally-pipelined/src/muldiv/div/test_iter32.sv create mode 100755 wally-pipelined/src/muldiv/div/test_iter64.sv diff --git a/wally-pipelined/src/muldiv/div/README b/wally-pipelined/src/muldiv/div/README new file mode 100755 index 000000000..6898c5cec --- /dev/null +++ b/wally-pipelined/src/muldiv/div/README @@ -0,0 +1 @@ +vsim -do iter64.do -c diff --git a/wally-pipelined/src/muldiv/div/README.md b/wally-pipelined/src/muldiv/div/README.md new file mode 100644 index 000000000..ebb006c95 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/README.md @@ -0,0 +1,22 @@ +This is a novel integer divider using r4 division by recurrence. The +reference is: + +J. E. Stine and K. Hill, "An Efficient Implementation of Radix-4 +Integer Division Using Scaling," 2020 IEEE 63rd International Midwest +Symposium on Circuits and Systems (MWSCAS), Springfield, MA, USA, +2020, pp. 1092-1095, doi: 10.1109/MWSCAS48704.2020.9184631. + +Although this version does not contain scaling, it could do this, if +needed. Moreover, a higher radix or overlapped radix can be done +easily to expand the the size. Also, the implementations here are +initially unsigned but hope to expand for signed, which should be +easy. + +There are two types of tests in this directory within each testbench. +One tests for 32-bits and the other 64-bits: + +int32div.do and int64div.do = test individual vector for debugging + +iter32.do and iter64.do = do not use any waveform generation and just +output lots of tests + diff --git a/wally-pipelined/src/muldiv/div/divide4x32.sv b/wally-pipelined/src/muldiv/div/divide4x32.sv new file mode 100755 index 000000000..9b0ac2b4c --- /dev/null +++ b/wally-pipelined/src/muldiv/div/divide4x32.sv @@ -0,0 +1,1302 @@ +module int32div (Q, done, divdone, rem0, div0, N, D, clk, reset, start); + + input logic [31:0] N, D; + input logic clk; + input logic reset; + input logic start; + + output logic [31:0] Q; + output logic [31:0] rem0; + output logic div0; + output logic done; + output logic divdone; + + logic enable; + logic state0; + logic V; + logic [5:0] Num; + logic [4:0] P, NumIter, RemShift; + logic [31:0] op1, op2, op1shift, Rem5; + logic [32:0] Qd, Rd, Qd2, Rd2; + logic [3:0] quotient; + logic otfzero; + + // Divider goes the distance to 19 cycles + // (thanks the evil divisor for D = 0x1) + // but could theoretically be stopped when + // divdone is asserted. The enable signal + // turns off register storage thus invalidating + // any future cycles. + + // Shift D, if needed (for integer) + // needed to allow qst to be in range for integer + // division [1,2) and allow integer divide to work. + // + // The V or valid bit can be used to determine if D + // is 0 and thus a divide by 0 exception. This div0 + // exception is given to FSM to tell the operation to + // quit gracefully. + + // div0 produced output errors have untested results + // (it is assumed the OS would handle some output) + + lz32 p1 (P, V, D); + shifter_l32 p2 (op2, D, P); + assign op1 = N; + assign div0 = ~V; + + // Brent-Kung adder chosen for the heck of it and + // since so small (maybe could have used a RCA) + + // #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0) + // v = 2 since \rho < 1 (add 4 to make sure its a ceil) + bk6 cpa1 (co1, Num, {1'b0, P}, + {3'h0, shiftResult, ~shiftResult,1'b0}, 1'b0); + + // Determine whether need to add just Q/Rem + assign shiftResult = P[0]; + // div by 2 (ceil) + assign NumIter = Num[5:1]; + assign RemShift = P; + + // FSM to control integer divider + // assume inputs are postive edge and + // datapath (divider) is negative edge + fsm32 fsm1 (enablev, state0v, donev, divdonev, otfzerov, + start, div0, NumIter, ~clk, reset); + + flopr #(1) rega (~clk, reset, donev, done); + flopr #(1) regb (~clk, reset, divdonev, divdone); + flopr #(1) regc (~clk, reset, otfzerov, otfzero); + flopr #(1) regd (~clk, reset, enablev, enable); + flopr #(1) rege (~clk, reset, state0v, state0); + + // To obtain a correct remainder the last bit of the + // quotient has to be aligned with a radix-r boundary. + // Since the quotient is in the range 1/2 < q < 2 (one + // integer bit and m fractional bits), this is achieved by + // shifting N right by v+s so that (m+v+s) mod k = 0. And, + // the quotient has to be aligned to the integer position. + + // Used a Brent-Kung for no reason (just wanted prefix -- might + // have gotten away with a RCA) + + // Actual divider unit FIXME: r16 (jes) + divide4x32 p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + // Storage registers to hold contents stable + flopenr #(33) reg3 (clk, reset, enable, Rd, Rd2); + flopenr #(33) reg4 (clk, reset, enable, Qd, Qd2); + + // Probably not needed - just assigns results + assign Q = Qd2[31:0]; + assign Rem5 = Rd2[32:1]; + + // Adjust remainder by m (no need to adjust by + // n ln(r) + shifter_r32 p4 (rem0, Rem5, RemShift); + +endmodule // int32div + +module divide4x32 (Q, rem0, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + input logic [31:0] op1, op2; + input logic clk, state0; + input logic reset; + input logic enable; + input logic otfzero; + input logic shiftResult; + + output logic [32:0] rem0; + output logic [32:0] Q; + output logic [3:0] quotient; + + logic [35:0] Sum, Carry; + logic [32:0] Qstar; + logic [32:0] QMstar; + logic [7:0] qtotal; + logic [35:0] SumN, CarryN, SumN2, CarryN2; + logic [35:0] divi1, divi2, divi1c, divi2c, dive1; + logic [35:0] mdivi_temp, mdivi; + logic zero; + logic [1:0] qsel; + logic [1:0] Qin, QMin; + logic CshiftQ, CshiftQM; + logic [35:0] rem1, rem2, rem3; + logic [35:0] SumR, CarryR; + logic [32:0] Qt; + + // Create one's complement values of Divisor (for q*D) + assign divi1 = {3'h0, op2, 1'b0}; + assign divi2 = {2'h0, op2, 2'b0}; + assign divi1c = ~divi1; + assign divi2c = ~divi2; + // Shift x1 if not mod k + mux2 #(36) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); + + // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) + mux2 #(36) mx2 ({CarryN2[33:0], 2'h0}, 36'h0, state0, CarryN); + mux2 #(36) mx3 ({SumN2[33:0], 2'h0}, dive1, state0, SumN); + // Simplify QST + adder #(8) cpa1 (SumN[35:28], CarryN[35:28], qtotal); + // q = {+2, +1, -1, -2} else q = 0 + qst4 pd1 (qtotal[7:1], divi1[31:29], quotient); + assign ulp = quotient[2]|quotient[3]; + assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); + // Map to binary encoding + assign qsel[1] = quotient[3]|quotient[2]; + assign qsel[0] = quotient[3]|quotient[1]; + mux4 #(36) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); + mux2 #(36) mx5 (mdivi_temp, 36'h0, zero, mdivi); + csa #(36) csa1 (mdivi, SumN, {CarryN[35:1], ulp}, Sum, Carry); + // regs : save CSA + flopenr #(36) reg1 (clk, reset, enable, Sum, SumN2); + flopenr #(36) reg2 (clk, reset, enable, Carry, CarryN2); + // OTF + ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); + otf #(33) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, + otfzero, enable, Qstar, QMstar); + + // Correction and generation of Remainder + add36 cpa2 (cout1, rem1, SumN2[35:0], CarryN2[35:0], 1'b0); + // Add back +D as correction + csa #(36) csa2 (CarryN2[35:0], SumN2[35:0], divi1, SumR, CarryR); + add36 cpa3 (cout2, rem2, SumR, CarryR, 1'b0); + // Choose remainder (Rem or Rem+D) + mux2 #(36) mx6 (rem1, rem2, rem1[35], rem3); + // Choose correct Q or QM + mux2 #(33) mx7 (Qstar, QMstar, rem1[35], Qt); + // Final results + assign rem0 = rem3[32:0]; + assign Q = Qt; + +endmodule // divide4x32 + +module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); + + input logic [3:0] quot; + + output logic [1:0] Qin; + output logic [1:0] QMin; + output logic CshiftQ; + output logic CshiftQM; + + assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]); + assign Qin[0] = (quot[1]) | (quot[2]); + assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]); + assign QMin[0] = (quot[3]) | (quot[0]) | + (!quot[3]&!quot[2]&!quot[1]&!quot[0]); + assign CshiftQ = (quot[1]) | (quot[0]); + assign CshiftQM = (quot[3]) | (quot[2]); + + endmodule + +module otf #(parameter WIDTH=8) + (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); + + input logic [1:0] Qin, QMin; + input logic CshiftQ, CshiftQM; + input logic clk; + input logic reset; + input logic enable; + + output logic [WIDTH-1:0] R2Q; + output logic [WIDTH-1:0] R1Q; + + logic [WIDTH-1:0] Qstar, QMstar; + logic [WIDTH-1:0] M1Q, M2Q; + + // QM + mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); + flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); + // Q + mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); + flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); + + assign Qstar = R2Q; + assign QMstar = R1Q; + + endmodule // otf8 + + module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, + output logic [WIDTH-1:0] y); + assign y = a + b; + + endmodule // adder + + module fa (input logic a, b, c, output logic sum, carry); + + assign sum = a^b^c; + assign carry = a&b|a&c|b&c; + + endmodule // fa + +// Modular Carry-Save Adder + module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, + output logic [WIDTH-1:0] sum, carry); + + logic [WIDTH:0] carry_temp; + genvar i; + generate + for (i=0;i B. LT and GT are both '0' if A = B. + +module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule // magcompare2b + +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare8 (LT, EQ, A, B); + + input logic [7:0] A; + input logic [7:0] B; + + logic [3:0] s; + logic [3:0] t; + logic [1:0] u; + logic [1:0] v; + logic GT; + //wire LT; + + output logic EQ; + output logic LT; + + magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); + + magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); + magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); + + magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); + + assign EQ = ~(GT | LT); + +endmodule // magcompare8 diff --git a/wally-pipelined/src/muldiv/div/divide4x64.sv b/wally-pipelined/src/muldiv/div/divide4x64.sv new file mode 100755 index 000000000..0cb6b0554 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/divide4x64.sv @@ -0,0 +1,1921 @@ +module int64div (Q, done, divdone, rem0, div0, N, D, clk, reset, start); + + input logic [63:0] N, D; + input logic clk; + input logic reset; + input logic start; + + output logic [63:0] Q; + output logic [63:0] rem0; + output logic div0; + output logic done; + output logic divdone; + + logic enable; + logic state0; + logic V; + logic [7:0] Num; + logic [5:0] P, NumIter, RemShift; + logic [63:0] op1, op2, op1shift, Rem5; + logic [64:0] Qd, Rd, Qd2, Rd2; + logic [3:0] quotient; + logic otfzero; + logic shiftResult; + + // Divider goes the distance to 37 cycles + // (thanks the evil divisor for D = 0x1) + // but could theoretically be stopped when + // divdone is asserted. The enable signal + // turns off register storage thus invalidating + // any future cycles. + + // Shift D, if needed (for integer) + // needed to allow qst to be in range for integer + // division [1,2) and allow integer divide to work. + // + // The V or valid bit can be used to determine if D + // is 0 and thus a divide by 0 exception. This div0 + // exception is given to FSM to tell the operation to + // quit gracefully. + + // div0 produced output errors have untested results + // (it is assumed the OS would handle some output) + + lz64 p1 (P, V, D); + shifter_l64 p2 (op2, D, P); + assign op1 = N; + assign div0 = ~V; + + // Brent-Kung adder chosen for the heck of it and + // since so small (maybe could have used a RCA) + + // #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0) + // v = 2 since \rho < 1 (add 4 to make sure its a ceil) + bk8 cpa1 (co1, Num, {2'b0, P}, + {5'h0, shiftResult, ~shiftResult, 1'b0}, 1'b0); + + // Determine whether need to add just Q/Rem + assign shiftResult = P[0]; + // div by 2 (ceil) + assign NumIter = Num[6:1]; + assign RemShift = P; + + // FSM to control integer divider + // assume inputs are postive edge and + // datapath (divider) is negative edge + fsm64 fsm1 (enablev, state0v, donev, divdonev, otfzerov, + start, div0, NumIter, ~clk, reset); + + flopr #(1) rega (~clk, reset, donev, done); + flopr #(1) regb (~clk, reset, divdonev, divdone); + flopr #(1) regc (~clk, reset, otfzerov, otfzero); + flopr #(1) regd (~clk, reset, enablev, enable); + flopr #(1) rege (~clk, reset, state0v, state0); + + // To obtain a correct remainder the last bit of the + // quotient has to be aligned with a radix-r boundary. + // Since the quotient is in the range 1/2 < q < 2 (one + // integer bit and m fractional bits), this is achieved by + // shifting N right by v+s so that (m+v+s) mod k = 0. And, + // the quotient has to be aligned to the integer position. + + // Used a Brent-Kung for no reason (just wanted prefix -- might + // have gotten away with a RCA) + + // Actual divider unit FIXME: r16 (jes) + divide4x64 p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + // Storage registers to hold contents stable + flopenr #(65) reg3 (clk, reset, enable, Rd, Rd2); + flopenr #(65) reg4 (clk, reset, enable, Qd, Qd2); + + // Probably not needed - just assigns results + assign Q = Qd2[63:0]; + assign Rem5 = Rd2[64:1]; + + // Adjust remainder by m (no need to adjust by + // n ln(r) + shifter_r64 p4 (rem0, Rem5, RemShift); + +endmodule // int32div + +module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, + enable, otfzero, shiftResult); + + input logic [63:0] op1, op2; + input logic clk, state0; + input logic reset; + input logic enable; + input logic otfzero; + input logic shiftResult; + + output logic [64:0] rem0; + output logic [64:0] Q; + output logic [3:0] quotient; + + logic [67:0] Sum, Carry; + logic [64:0] Qstar; + logic [64:0] QMstar; + logic [7:0] qtotal; + logic [67:0] SumN, CarryN, SumN2, CarryN2; + logic [67:0] divi1, divi2, divi1c, divi2c, dive1; + logic [67:0] mdivi_temp, mdivi; + logic zero; + logic [1:0] qsel; + logic [1:0] Qin, QMin; + logic CshiftQ, CshiftQM; + logic [67:0] rem1, rem2, rem3; + logic [67:0] SumR, CarryR; + logic [64:0] Qt; + + // Create one's complement values of Divisor (for q*D) + assign divi1 = {3'h0, op2, 1'b0}; + assign divi2 = {2'h0, op2, 2'b0}; + assign divi1c = ~divi1; + assign divi2c = ~divi2; + // Shift x1 if not mod k + mux2 #(68) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); + + // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) + mux2 #(68) mx2 ({CarryN2[65:0], 2'h0}, 68'h0, state0, CarryN); + mux2 #(68) mx3 ({SumN2[65:0], 2'h0}, dive1, state0, SumN); + // Simplify QST + adder #(8) cpa1 (SumN[67:60], CarryN[67:60], qtotal); + // q = {+2, +1, -1, -2} else q = 0 + qst4 pd1 (qtotal[7:1], divi1[63:61], quotient); + assign ulp = quotient[2]|quotient[3]; + assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); + // Map to binary encoding + assign qsel[1] = quotient[3]|quotient[2]; + assign qsel[0] = quotient[3]|quotient[1]; + mux4 #(68) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); + mux2 #(68) mx5 (mdivi_temp, 68'h0, zero, mdivi); + csa #(68) csa1 (mdivi, SumN, {CarryN[67:1], ulp}, Sum, Carry); + // regs : save CSA + flopenr #(68) reg1 (clk, reset, enable, Sum, SumN2); + flopenr #(68) reg2 (clk, reset, enable, Carry, CarryN2); + // OTF + ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); + otf #(65) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, + otfzero, enable, Qstar, QMstar); + + // Correction and generation of Remainder + add68 cpa2 (cout1, rem1, SumN2[67:0], CarryN2[67:0], 1'b0); + // Add back +D as correction + csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR); + add68 cpa3 (cout2, rem2, SumR, CarryR, 1'b0); + // Choose remainder (Rem or Rem+D) + mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3); + // Choose correct Q or QM + mux2 #(65) mx7 (Qstar, QMstar, rem1[67], Qt); + // Final results + assign rem0 = rem3[64:0]; + assign Q = Qt; + +endmodule // divide4x64 + +module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); + + input logic [3:0] quot; + + output logic [1:0] Qin; + output logic [1:0] QMin; + output logic CshiftQ; + output logic CshiftQM; + + assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]); + assign Qin[0] = (quot[1]) | (quot[2]); + assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]); + assign QMin[0] = (quot[3]) | (quot[0]) | + (!quot[3]&!quot[2]&!quot[1]&!quot[0]); + assign CshiftQ = (quot[1]) | (quot[0]); + assign CshiftQM = (quot[3]) | (quot[2]); + + endmodule + +module otf #(parameter WIDTH=8) + (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); + + input logic [1:0] Qin, QMin; + input logic CshiftQ, CshiftQM; + input logic clk; + input logic reset; + input logic enable; + + output logic [WIDTH-1:0] R2Q; + output logic [WIDTH-1:0] R1Q; + + logic [WIDTH-1:0] Qstar, QMstar; + logic [WIDTH-1:0] M1Q, M2Q; + + // QM + mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); + flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); + // Q + mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); + flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); + + assign Qstar = R2Q; + assign QMstar = R1Q; + + endmodule // otf8 + + module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, + output logic [WIDTH-1:0] y); + + assign y = a + b; + + endmodule // adder + + module fa (input logic a, b, c, output logic sum, carry); + + assign sum = a^b^c; + assign carry = a&b|a&c|b&c; + + endmodule // fa + + module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, + output logic [WIDTH-1:0] sum, carry); + + logic [WIDTH:0] carry_temp; + genvar i; + generate + for (i=0;i B. LT and GT are both '0' if A = B. + +module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule // magcompare2b + +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare8 (LT, EQ, A, B); + + input logic [7:0] A; + input logic [7:0] B; + + logic [3:0] s; + logic [3:0] t; + logic [1:0] u; + logic [1:0] v; + logic GT; + //wire LT; + + output logic EQ; + output logic LT; + + magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); + magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); + magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); + magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); + + magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); + magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); + + magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); + + assign EQ = ~(GT | LT); + +endmodule // magcompare8 diff --git a/wally-pipelined/src/muldiv/div/int32div.do b/wally-pipelined/src/muldiv/div/int32div.do new file mode 100755 index 000000000..bb327fbc6 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/int32div.do @@ -0,0 +1,114 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog muxs.sv shifters.sv divide4x32.sv test_int32div.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + +view list +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -noupdate -divider -height 32 "Control Signals" +add wave -hex -color gold /tb/clk +add wave -hex -color #0080ff /tb/reset +add wave -hex -color #0080ff /tb/start +add wave -hex -color #0080ff /tb/done +add wave -hex -color #0080ff /tb/divdone +add wave -noupdate -divider -height 32 "Key Parts" +add wave -unsigned /tb/dut/NumIter +add wave -unsigned /tb/dut/RemShift +add wave -unsigned /tb/dut/Qd2 +add wave -unsigned /tb/dut/Rd2 +add wave -unsigned /tb/dut/rem0 +add wave -unsigned /tb/dut/Q +add wave -unsigned /tb/dut/P +add wave -unsigned /tb/dut/shiftResult +add wave -noupdate -divider -height 32 "FSM" +add wave -hex /tb/dut/fsm1/CURRENT_STATE +add wave -hex /tb/dut/fsm1/NEXT_STATE +add wave -hex -color #0080ff /tb/dut/fsm1/start +add wave -hex -color #0080ff /tb/dut/fsm1/state0 +add wave -hex -color #0080ff /tb/dut/fsm1/done +add wave -hex -color #0080ff /tb/dut/fsm1/en +add wave -hex -color #0080ff /tb/dut/fsm1/divdone +add wave -hex -color #0080ff /tb/dut/fsm1/reset +add wave -hex -color #0080ff /tb/dut/fsm1/otfzero +add wave -hex -color #0080ff /tb/dut/fsm1/LT +add wave -hex -color #0080ff /tb/dut/fsm1/EQ +add wave -hex -color gold /tb/dut/fsm1/clk +add wave -noupdate -divider -height 32 "Datapath" +add wave -hex /tb/dut/N +add wave -hex /tb/dut/D +add wave -hex /tb/dut/reset +add wave -hex /tb/dut/start +add wave -hex /tb/dut/Q +add wave -hex /tb/dut/rem0 +add wave -hex /tb/dut/div0 +add wave -hex /tb/dut/done +add wave -hex /tb/dut/divdone +add wave -hex /tb/dut/enable +add wave -hex /tb/dut/state0 +add wave -hex /tb/dut/V +add wave -hex /tb/dut/Num +add wave -hex /tb/dut/P +add wave -hex /tb/dut/NumIter +add wave -hex /tb/dut/RemShift +add wave -hex /tb/dut/op1 +add wave -hex /tb/dut/op2 +add wave -hex /tb/dut/op1shift +add wave -hex /tb/dut/Rem5 +add wave -hex /tb/dut/Qd +add wave -hex /tb/dut/Rd +add wave -hex /tb/dut/Qd2 +add wave -hex /tb/dut/Rd2 +add wave -hex /tb/dut/quotient +add wave -hex /tb/dut/otfzero +add wave -noupdate -divider -height 32 "Divider" +add wave -hex -r /tb/dut/p3/* + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 138ns + + diff --git a/wally-pipelined/src/muldiv/div/int64div.do b/wally-pipelined/src/muldiv/div/int64div.do new file mode 100755 index 000000000..0516f2108 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/int64div.do @@ -0,0 +1,114 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog muxs.sv shifters.sv divide4x64.sv test_int64div.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + +view list +view wave + +-- display input and output signals as hexidecimal values +# Diplays All Signals recursively +add wave -noupdate -divider -height 32 "Control Signals" +add wave -hex -color gold /tb/clk +add wave -hex -color #0080ff /tb/reset +add wave -hex -color #0080ff /tb/start +add wave -hex -color #0080ff /tb/done +add wave -hex -color #0080ff /tb/divdone +add wave -noupdate -divider -height 32 "Key Parts" +add wave -unsigned /tb/dut/NumIter +add wave -unsigned /tb/dut/RemShift +add wave -unsigned /tb/dut/Qd2 +add wave -unsigned /tb/dut/Rd2 +add wave -unsigned /tb/dut/rem0 +add wave -unsigned /tb/dut/Q +add wave -unsigned /tb/dut/P +add wave -unsigned /tb/dut/shiftResult +add wave -noupdate -divider -height 32 "FSM" +add wave -hex /tb/dut/fsm1/CURRENT_STATE +add wave -hex /tb/dut/fsm1/NEXT_STATE +add wave -hex -color #0080ff /tb/dut/fsm1/start +add wave -hex -color #0080ff /tb/dut/fsm1/state0 +add wave -hex -color #0080ff /tb/dut/fsm1/done +add wave -hex -color #0080ff /tb/dut/fsm1/en +add wave -hex -color #0080ff /tb/dut/fsm1/divdone +add wave -hex -color #0080ff /tb/dut/fsm1/reset +add wave -hex -color #0080ff /tb/dut/fsm1/otfzero +add wave -hex -color #0080ff /tb/dut/fsm1/LT +add wave -hex -color #0080ff /tb/dut/fsm1/EQ +add wave -hex -color gold /tb/dut/fsm1/clk +add wave -noupdate -divider -height 32 "Datapath" +add wave -hex /tb/dut/N +add wave -hex /tb/dut/D +add wave -hex /tb/dut/reset +add wave -hex /tb/dut/start +add wave -hex /tb/dut/Q +add wave -hex /tb/dut/rem0 +add wave -hex /tb/dut/div0 +add wave -hex /tb/dut/done +add wave -hex /tb/dut/divdone +add wave -hex /tb/dut/enable +add wave -hex /tb/dut/state0 +add wave -hex /tb/dut/V +add wave -hex /tb/dut/Num +add wave -hex /tb/dut/P +add wave -hex /tb/dut/NumIter +add wave -hex /tb/dut/RemShift +add wave -hex /tb/dut/op1 +add wave -hex /tb/dut/op2 +add wave -hex /tb/dut/op1shift +add wave -hex /tb/dut/Rem5 +add wave -hex /tb/dut/Qd +add wave -hex /tb/dut/Rd +add wave -hex /tb/dut/Qd2 +add wave -hex /tb/dut/Rd2 +add wave -hex /tb/dut/quotient +add wave -hex /tb/dut/otfzero +add wave -noupdate -divider -height 32 "Divider" +add wave -hex -r /tb/dut/p3/* + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 138ns + + diff --git a/wally-pipelined/src/muldiv/div/iter32.do b/wally-pipelined/src/muldiv/div/iter32.do new file mode 100755 index 000000000..0472bd7db --- /dev/null +++ b/wally-pipelined/src/muldiv/div/iter32.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog muxs.sv shifters.sv divide4x32.sv test_iter32.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 9586700ns +quit diff --git a/wally-pipelined/src/muldiv/div/iter64.do b/wally-pipelined/src/muldiv/div/iter64.do new file mode 100755 index 000000000..0154d5f7c --- /dev/null +++ b/wally-pipelined/src/muldiv/div/iter64.do @@ -0,0 +1,50 @@ +# Copyright 1991-2007 Mentor Graphics Corporation +# +# Modification by Oklahoma State University +# Use with Testbench +# James Stine, 2008 +# Go Cowboys!!!!!! +# +# All Rights Reserved. +# +# THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION +# WHICH IS THE PROPERTY OF MENTOR GRAPHICS CORPORATION +# OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS. + +# Use this run.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do run.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do run.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +vlog muxs.sv shifters.sv divide4x64.sv test_iter64.sv + +# start and run simulation +vsim -voptargs=+acc work.tb + + +-- Set Wave Output Items +TreeUpdate [SetDefaultTree] +WaveRestoreZoom {0 ps} {75 ns} +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 0 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 + +-- Run the Simulation +run 9586700ns +quit diff --git a/wally-pipelined/src/muldiv/div/muxs.sv b/wally-pipelined/src/muldiv/div/muxs.sv new file mode 100644 index 000000000..d13045e6d --- /dev/null +++ b/wally-pipelined/src/muldiv/div/muxs.sv @@ -0,0 +1,51 @@ +module mux2 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, + input logic s, + output logic [WIDTH-1:0] y); + + assign y = s ? d1 : d0; + +endmodule // mux2 + +module mux3 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, d2, + input logic [1:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[1] ? d2 : (s[0] ? d1 : d0); + +endmodule // mux3 + +module mux4 #(parameter WIDTH = 8) + (input logic [WIDTH-1:0] d0, d1, d2, d3, + input logic [1:0] s, + output logic [WIDTH-1:0] y); + + assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0); + +endmodule // mux4 + +module mux21x32 (Z, A, B, Sel); + + input logic [31:0] A; + input logic [31:0] B; + input logic Sel; + + output logic [31:0] Z; + + assign Z = Sel ? B : A; + +endmodule // mux21x32 + +module mux21x64 (Z, A, B, Sel); + + input logic [63:0] A; + input logic [63:0] B; + input logic Sel; + + output logic [63:0] Z; + + assign Z = Sel ? B : A; + +endmodule // mux21x64 + diff --git a/wally-pipelined/src/muldiv/div/shifters.sv b/wally-pipelined/src/muldiv/div/shifters.sv new file mode 100644 index 000000000..85c4e5b68 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/shifters.sv @@ -0,0 +1,106 @@ +module shifter_l64 (Z, A, Shift); + + input logic [63:0] A; + input logic [5:0] Shift; + + logic [63:0] stage1; + logic [63:0] stage2; + logic [63:0] stage3; + logic [63:0] stage4; + logic [63:0] stage5; + logic [31:0] thirtytwozeros = 32'h0; + logic [15:0] sixteenzeros = 16'h0; + logic [ 7:0] eightzeros = 8'h0; + logic [ 3:0] fourzeros = 4'h0; + logic [ 1:0] twozeros = 2'b00; + logic onezero = 1'b0; + + output logic [63:0] Z; + + mux21x64 mx01(stage1, A, {A[31:0], thirtytwozeros}, Shift[5]); + mux21x64 mx02(stage2, stage1, {stage1[47:0], sixteenzeros}, Shift[4]); + mux21x64 mx03(stage3, stage2, {stage2[55:0], eightzeros}, Shift[3]); + mux21x64 mx04(stage4, stage3, {stage3[59:0], fourzeros}, Shift[2]); + mux21x64 mx05(stage5, stage4, {stage4[61:0], twozeros}, Shift[1]); + mux21x64 mx06(Z, stage5, {stage5[62:0], onezero}, Shift[0]); + +endmodule // shifter_l64 + +module shifter_r64 (Z, A, Shift); + + input logic [63:0] A; + input logic [5:0] Shift; + + logic [63:0] stage1; + logic [63:0] stage2; + logic [63:0] stage3; + logic [63:0] stage4; + logic [63:0] stage5; + logic [31:0] thirtytwozeros = 32'h0; + logic [15:0] sixteenzeros = 16'h0; + logic [ 7:0] eightzeros = 8'h0; + logic [ 3:0] fourzeros = 4'h0; + logic [ 1:0] twozeros = 2'b00; + logic onezero = 1'b0; + + output logic [63:0] Z; + + mux21x64 mx01(stage1, A, {thirtytwozeros, A[63:32]}, Shift[5]); + mux21x64 mx02(stage2, stage1, {sixteenzeros, stage1[63:16]}, Shift[4]); + mux21x64 mx03(stage3, stage2, {eightzeros, stage2[63:8]}, Shift[3]); + mux21x64 mx04(stage4, stage3, {fourzeros, stage3[63:4]}, Shift[2]); + mux21x64 mx05(stage5, stage4, {twozeros, stage4[63:2]}, Shift[1]); + mux21x64 mx06(Z, stage5, {onezero, stage5[63:1]}, Shift[0]); + +endmodule // shifter_r64 + +module shifter_l32 (Z, A, Shift); + + input logic [31:0] A; + input logic [4:0] Shift; + + logic [31:0] stage1; + logic [31:0] stage2; + logic [31:0] stage3; + logic [31:0] stage4; + logic [15:0] sixteenzeros = 16'h0; + logic [ 7:0] eightzeros = 8'h0; + logic [ 3:0] fourzeros = 4'h0; + logic [ 1:0] twozeros = 2'b00; + logic onezero = 1'b0; + + output logic [31:0] Z; + + mux21x32 mx01(stage1, A, {A[15:0], sixteenzeros}, Shift[4]); + mux21x32 mx02(stage2, stage1, {stage1[23:0], eightzeros}, Shift[3]); + mux21x32 mx03(stage3, stage2, {stage2[27:0], fourzeros}, Shift[2]); + mux21x32 mx04(stage4, stage3, {stage3[29:0], twozeros}, Shift[1]); + mux21x32 mx05(Z , stage4, {stage4[30:0], onezero}, Shift[0]); + +endmodule // shifter_l32 + +module shifter_r32 (Z, A, Shift); + + input logic [31:0] A; + input logic [4:0] Shift; + + logic [31:0] stage1; + logic [31:0] stage2; + logic [31:0] stage3; + logic [31:0] stage4; + logic [15:0] sixteenzeros = 16'h0; + logic [ 7:0] eightzeros = 8'h0; + logic [ 3:0] fourzeros = 4'h0; + logic [ 1:0] twozeros = 2'b00; + logic onezero = 1'b0; + + output logic [31:0] Z; + + mux21x32 mx01(stage1, A, {sixteenzeros, A[31:16]}, Shift[4]); + mux21x32 mx02(stage2, stage1, {eightzeros, stage1[31:8]}, Shift[3]); + mux21x32 mx03(stage3, stage2, {fourzeros, stage2[31:4]}, Shift[2]); + mux21x32 mx04(stage4, stage3, {twozeros, stage3[31:2]}, Shift[1]); + mux21x32 mx05(Z , stage4, {onezero, stage4[31:1]}, Shift[0]); + +endmodule // shifter_r32 + diff --git a/wally-pipelined/src/muldiv/div/test_int32div.sv b/wally-pipelined/src/muldiv/div/test_int32div.sv new file mode 100755 index 000000000..c9260ecc8 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/test_int32div.sv @@ -0,0 +1,50 @@ +module tb; + + logic [31:0] N, D; + logic clk; + logic reset; + logic start; + + logic [31:0] Q; + logic [31:0] rem; + logic div0; + logic done; + logic divdone; + + integer handle3; + integer desc3; + integer i; + + logic [7:0] count [0:15]; + + int32div dut (Q, done, divdone, rem, div0, N, D, clk, reset, start); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + #800 $finish; + end + + + initial + begin + #0 N = 32'h0; + #0 D = 32'h0; + #0 start = 1'b0; + #0 reset = 1'b1; + #22 reset = 1'b0; + //#25 N = 32'h9830_07C0; + //#0 D = 32'h0000_000C; + #25 N = 32'h06b9_7b0d; + #0 D = 32'h46df_998d; + #0 start = 1'b1; + #50 start = 1'b0; + + end + +endmodule // tb diff --git a/wally-pipelined/src/muldiv/div/test_int64div.sv b/wally-pipelined/src/muldiv/div/test_int64div.sv new file mode 100644 index 000000000..ad415f0ff --- /dev/null +++ b/wally-pipelined/src/muldiv/div/test_int64div.sv @@ -0,0 +1,51 @@ +module tb; + + logic [63:0] N, D; + logic clk; + logic reset; + logic start; + + logic [63:0] Q; + logic [63:0] rem; + logic div0; + logic done; + logic divdone; + + integer handle3; + integer desc3; + integer i; + + logic [7:0] count [0:15]; + + int64div dut (Q, done, divdone, rem, div0, N, D, clk, reset, start); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + #800 $finish; + end + + + initial + begin + #0 N = 64'h0; + #0 D = 64'h0; + #0 start = 1'b0; + #0 reset = 1'b1; + #22 reset = 1'b0; + //#25 N = 64'h0000_0000_9830_07C0; + //#0 D = 64'h0000_0000_0000_000C; + #25 N = 64'h0000_0000_06b9_7b0d; + #0 D = 64'h0000_0000_46df_998d; + #0 start = 1'b1; + #50 start = 1'b0; + + + end + +endmodule // tb diff --git a/wally-pipelined/src/muldiv/div/test_iter32.sv b/wally-pipelined/src/muldiv/div/test_iter32.sv new file mode 100755 index 000000000..94a42c211 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/test_iter32.sv @@ -0,0 +1,74 @@ +module tb; + + logic [31:0] N, D; + logic clk; + logic reset; + logic start; + + logic [31:0] Q; + logic [31:0] rem0; + logic div0; + logic done; + logic divdone; + + integer handle3; + integer desc3; + integer i; + + bit [31:0] Ncomp; + bit [31:0] Dcomp; + bit [31:0] Qcomp; + bit [31:0] Rcomp; + + logic [7:0] count [0:15]; + + int32div dut (Q, done, divdone, rem0, div0, N, D, clk, reset, start); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("iter32.out"); + #8000000 $finish; + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + for (i=0; i<2; i=i+1) + begin + N = $random; + D = $random; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (25) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + + + end + +endmodule // tb + + + + + + diff --git a/wally-pipelined/src/muldiv/div/test_iter64.sv b/wally-pipelined/src/muldiv/div/test_iter64.sv new file mode 100755 index 000000000..0674d8665 --- /dev/null +++ b/wally-pipelined/src/muldiv/div/test_iter64.sv @@ -0,0 +1,72 @@ +module tb; + + logic [63:0] N, D; + logic clk; + logic reset; + logic start; + + logic [63:0] Q; + logic [63:0] rem0; + logic div0; + logic done; + logic divdone; + + integer handle3; + integer desc3; + integer i; + + bit [63:0] Ncomp; + bit [63:0] Dcomp; + bit [63:0] Qcomp; + bit [63:0] Rcomp; + + logic [7:0] count [0:15]; + + int64div dut (Q, done, divdone, rem0, div0, N, D, clk, reset, start); + + initial + begin + clk = 1'b0; + forever #5 clk = ~clk; + end + + initial + begin + handle3 = $fopen("iter64.out"); + #8000000 $finish; + end + + always @(posedge clk, posedge reset) + begin + desc3 = handle3; + #0 start = 1'b0; + #0 reset = 1'b1; + #30 reset = 1'b0; + for (i=0; i<2; i=i+1) + begin + N = $random; + D = $random; + start <= 1'b1; + // Wait 2 cycles (to be sure) + repeat (2) + @(posedge clk); + start <= 1'b0; + repeat (41) + @(posedge clk); + Ncomp = N; + Dcomp = D; + Qcomp = Ncomp/Dcomp; + Rcomp = Ncomp%Dcomp; + $fdisplay(desc3, "%h %h %h %h || %h %h || %b %b", + N, D, Q, rem0, Qcomp, Rcomp, + (Q==Qcomp), (rem0==Rcomp)); + end // for (i=0; i<2, i=i+1) + end + +endmodule // tb + + + + + + diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 3cd13ab2d..c0a906c92 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -48,6 +48,16 @@ module muldiv ( mul mul(.*); + if (WIDTH==32) begin + divide4x32 div(.clk(clk), .reset(reset), + .N(SrcAE), .D(SrcBE), .Q(QuotE), .rem0(RemE), + .start(), .div0(), .done(), .divone()); + end else begin // WIDTH=64 + divide4x64 div(.clk(clk), .reset(reset), + .N(SrcAE), .D(SrcBE), .Q(QuotE), .rem0(RemE), + .start(), .div0(), .done(), .divone()); + end + // Select result always_comb case (Funct3E)