From 362f6ea2e614981e73e38312bd00c08fb8c543ca Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Fri, 2 Apr 2021 08:20:44 -0500 Subject: [PATCH] Minor cleanup --- wally-pipelined/src/muldiv/div.sv~ | 1921 ----------------- wally-pipelined/src/muldiv/div/div.c~ | 21 - .../src/muldiv/div/shifters_div.sv~ | 106 - 3 files changed, 2048 deletions(-) delete mode 100755 wally-pipelined/src/muldiv/div.sv~ delete mode 100644 wally-pipelined/src/muldiv/div/div.c~ delete mode 100644 wally-pipelined/src/muldiv/div/shifters_div.sv~ diff --git a/wally-pipelined/src/muldiv/div.sv~ b/wally-pipelined/src/muldiv/div.sv~ deleted file mode 100755 index 0cb6b0554..000000000 --- a/wally-pipelined/src/muldiv/div.sv~ +++ /dev/null @@ -1,1921 +0,0 @@ -module int64div (Q, done, divdone, rem0, div0, N, D, clk, reset, start); - - input logic [63:0] N, D; - input logic clk; - input logic reset; - input logic start; - - output logic [63:0] Q; - output logic [63:0] rem0; - output logic div0; - output logic done; - output logic divdone; - - logic enable; - logic state0; - logic V; - logic [7:0] Num; - logic [5:0] P, NumIter, RemShift; - logic [63:0] op1, op2, op1shift, Rem5; - logic [64:0] Qd, Rd, Qd2, Rd2; - logic [3:0] quotient; - logic otfzero; - logic shiftResult; - - // Divider goes the distance to 37 cycles - // (thanks the evil divisor for D = 0x1) - // but could theoretically be stopped when - // divdone is asserted. The enable signal - // turns off register storage thus invalidating - // any future cycles. - - // Shift D, if needed (for integer) - // needed to allow qst to be in range for integer - // division [1,2) and allow integer divide to work. - // - // The V or valid bit can be used to determine if D - // is 0 and thus a divide by 0 exception. This div0 - // exception is given to FSM to tell the operation to - // quit gracefully. - - // div0 produced output errors have untested results - // (it is assumed the OS would handle some output) - - lz64 p1 (P, V, D); - shifter_l64 p2 (op2, D, P); - assign op1 = N; - assign div0 = ~V; - - // Brent-Kung adder chosen for the heck of it and - // since so small (maybe could have used a RCA) - - // #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0) - // v = 2 since \rho < 1 (add 4 to make sure its a ceil) - bk8 cpa1 (co1, Num, {2'b0, P}, - {5'h0, shiftResult, ~shiftResult, 1'b0}, 1'b0); - - // Determine whether need to add just Q/Rem - assign shiftResult = P[0]; - // div by 2 (ceil) - assign NumIter = Num[6:1]; - assign RemShift = P; - - // FSM to control integer divider - // assume inputs are postive edge and - // datapath (divider) is negative edge - fsm64 fsm1 (enablev, state0v, donev, divdonev, otfzerov, - start, div0, NumIter, ~clk, reset); - - flopr #(1) rega (~clk, reset, donev, done); - flopr #(1) regb (~clk, reset, divdonev, divdone); - flopr #(1) regc (~clk, reset, otfzerov, otfzero); - flopr #(1) regd (~clk, reset, enablev, enable); - flopr #(1) rege (~clk, reset, state0v, state0); - - // To obtain a correct remainder the last bit of the - // quotient has to be aligned with a radix-r boundary. - // Since the quotient is in the range 1/2 < q < 2 (one - // integer bit and m fractional bits), this is achieved by - // shifting N right by v+s so that (m+v+s) mod k = 0. And, - // the quotient has to be aligned to the integer position. - - // Used a Brent-Kung for no reason (just wanted prefix -- might - // have gotten away with a RCA) - - // Actual divider unit FIXME: r16 (jes) - divide4x64 p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); - - // Storage registers to hold contents stable - flopenr #(65) reg3 (clk, reset, enable, Rd, Rd2); - flopenr #(65) reg4 (clk, reset, enable, Qd, Qd2); - - // Probably not needed - just assigns results - assign Q = Qd2[63:0]; - assign Rem5 = Rd2[64:1]; - - // Adjust remainder by m (no need to adjust by - // n ln(r) - shifter_r64 p4 (rem0, Rem5, RemShift); - -endmodule // int32div - -module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, - enable, otfzero, shiftResult); - - input logic [63:0] op1, op2; - input logic clk, state0; - input logic reset; - input logic enable; - input logic otfzero; - input logic shiftResult; - - output logic [64:0] rem0; - output logic [64:0] Q; - output logic [3:0] quotient; - - logic [67:0] Sum, Carry; - logic [64:0] Qstar; - logic [64:0] QMstar; - logic [7:0] qtotal; - logic [67:0] SumN, CarryN, SumN2, CarryN2; - logic [67:0] divi1, divi2, divi1c, divi2c, dive1; - logic [67:0] mdivi_temp, mdivi; - logic zero; - logic [1:0] qsel; - logic [1:0] Qin, QMin; - logic CshiftQ, CshiftQM; - logic [67:0] rem1, rem2, rem3; - logic [67:0] SumR, CarryR; - logic [64:0] Qt; - - // Create one's complement values of Divisor (for q*D) - assign divi1 = {3'h0, op2, 1'b0}; - assign divi2 = {2'h0, op2, 2'b0}; - assign divi1c = ~divi1; - assign divi2c = ~divi2; - // Shift x1 if not mod k - mux2 #(68) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); - - // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) - mux2 #(68) mx2 ({CarryN2[65:0], 2'h0}, 68'h0, state0, CarryN); - mux2 #(68) mx3 ({SumN2[65:0], 2'h0}, dive1, state0, SumN); - // Simplify QST - adder #(8) cpa1 (SumN[67:60], CarryN[67:60], qtotal); - // q = {+2, +1, -1, -2} else q = 0 - qst4 pd1 (qtotal[7:1], divi1[63:61], quotient); - assign ulp = quotient[2]|quotient[3]; - assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); - // Map to binary encoding - assign qsel[1] = quotient[3]|quotient[2]; - assign qsel[0] = quotient[3]|quotient[1]; - mux4 #(68) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); - mux2 #(68) mx5 (mdivi_temp, 68'h0, zero, mdivi); - csa #(68) csa1 (mdivi, SumN, {CarryN[67:1], ulp}, Sum, Carry); - // regs : save CSA - flopenr #(68) reg1 (clk, reset, enable, Sum, SumN2); - flopenr #(68) reg2 (clk, reset, enable, Carry, CarryN2); - // OTF - ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); - otf #(65) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, - otfzero, enable, Qstar, QMstar); - - // Correction and generation of Remainder - add68 cpa2 (cout1, rem1, SumN2[67:0], CarryN2[67:0], 1'b0); - // Add back +D as correction - csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR); - add68 cpa3 (cout2, rem2, SumR, CarryR, 1'b0); - // Choose remainder (Rem or Rem+D) - mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3); - // Choose correct Q or QM - mux2 #(65) mx7 (Qstar, QMstar, rem1[67], Qt); - // Final results - assign rem0 = rem3[64:0]; - assign Q = Qt; - -endmodule // divide4x64 - -module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); - - input logic [3:0] quot; - - output logic [1:0] Qin; - output logic [1:0] QMin; - output logic CshiftQ; - output logic CshiftQM; - - assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]); - assign Qin[0] = (quot[1]) | (quot[2]); - assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]); - assign QMin[0] = (quot[3]) | (quot[0]) | - (!quot[3]&!quot[2]&!quot[1]&!quot[0]); - assign CshiftQ = (quot[1]) | (quot[0]); - assign CshiftQM = (quot[3]) | (quot[2]); - - endmodule - -module otf #(parameter WIDTH=8) - (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); - - input logic [1:0] Qin, QMin; - input logic CshiftQ, CshiftQM; - input logic clk; - input logic reset; - input logic enable; - - output logic [WIDTH-1:0] R2Q; - output logic [WIDTH-1:0] R1Q; - - logic [WIDTH-1:0] Qstar, QMstar; - logic [WIDTH-1:0] M1Q, M2Q; - - // QM - mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); - flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); - // Q - mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); - flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); - - assign Qstar = R2Q; - assign QMstar = R1Q; - - endmodule // otf8 - - module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, - output logic [WIDTH-1:0] y); - - assign y = a + b; - - endmodule // adder - - module fa (input logic a, b, c, output logic sum, carry); - - assign sum = a^b^c; - assign carry = a&b|a&c|b&c; - - endmodule // fa - - module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, - output logic [WIDTH-1:0] sum, carry); - - logic [WIDTH:0] carry_temp; - genvar i; - generate - for (i=0;i B. LT and GT are both '0' if A = B. - -module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule // magcompare2b - -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 - -module magcompare8 (LT, EQ, A, B); - - input logic [7:0] A; - input logic [7:0] B; - - logic [3:0] s; - logic [3:0] t; - logic [1:0] u; - logic [1:0] v; - logic GT; - //wire LT; - - output logic EQ; - output logic LT; - - magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); - magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); - magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); - magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); - - magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); - magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); - - magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); - - assign EQ = ~(GT | LT); - -endmodule // magcompare8 diff --git a/wally-pipelined/src/muldiv/div/div.c~ b/wally-pipelined/src/muldiv/div/div.c~ deleted file mode 100644 index e4927c34a..000000000 --- a/wally-pipelined/src/muldiv/div/div.c~ +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include -#include - -int main() { - - uint64_t N; - uint64_t D; - uint64_t Q; - - N = 0xc9649f05a8e1a8bb; - D = 0x82f6747f707af2c0; - - printf("N = %" PRIx64 "\n", N); - printf("D = %" PRIx64 "\n", D); - printf("Q = %" PRIx64 "\n", Q); - printf("R = %" PRIx64 "\n", N%D); - - - -} diff --git a/wally-pipelined/src/muldiv/div/shifters_div.sv~ b/wally-pipelined/src/muldiv/div/shifters_div.sv~ deleted file mode 100644 index 85c4e5b68..000000000 --- a/wally-pipelined/src/muldiv/div/shifters_div.sv~ +++ /dev/null @@ -1,106 +0,0 @@ -module shifter_l64 (Z, A, Shift); - - input logic [63:0] A; - input logic [5:0] Shift; - - logic [63:0] stage1; - logic [63:0] stage2; - logic [63:0] stage3; - logic [63:0] stage4; - logic [63:0] stage5; - logic [31:0] thirtytwozeros = 32'h0; - logic [15:0] sixteenzeros = 16'h0; - logic [ 7:0] eightzeros = 8'h0; - logic [ 3:0] fourzeros = 4'h0; - logic [ 1:0] twozeros = 2'b00; - logic onezero = 1'b0; - - output logic [63:0] Z; - - mux21x64 mx01(stage1, A, {A[31:0], thirtytwozeros}, Shift[5]); - mux21x64 mx02(stage2, stage1, {stage1[47:0], sixteenzeros}, Shift[4]); - mux21x64 mx03(stage3, stage2, {stage2[55:0], eightzeros}, Shift[3]); - mux21x64 mx04(stage4, stage3, {stage3[59:0], fourzeros}, Shift[2]); - mux21x64 mx05(stage5, stage4, {stage4[61:0], twozeros}, Shift[1]); - mux21x64 mx06(Z, stage5, {stage5[62:0], onezero}, Shift[0]); - -endmodule // shifter_l64 - -module shifter_r64 (Z, A, Shift); - - input logic [63:0] A; - input logic [5:0] Shift; - - logic [63:0] stage1; - logic [63:0] stage2; - logic [63:0] stage3; - logic [63:0] stage4; - logic [63:0] stage5; - logic [31:0] thirtytwozeros = 32'h0; - logic [15:0] sixteenzeros = 16'h0; - logic [ 7:0] eightzeros = 8'h0; - logic [ 3:0] fourzeros = 4'h0; - logic [ 1:0] twozeros = 2'b00; - logic onezero = 1'b0; - - output logic [63:0] Z; - - mux21x64 mx01(stage1, A, {thirtytwozeros, A[63:32]}, Shift[5]); - mux21x64 mx02(stage2, stage1, {sixteenzeros, stage1[63:16]}, Shift[4]); - mux21x64 mx03(stage3, stage2, {eightzeros, stage2[63:8]}, Shift[3]); - mux21x64 mx04(stage4, stage3, {fourzeros, stage3[63:4]}, Shift[2]); - mux21x64 mx05(stage5, stage4, {twozeros, stage4[63:2]}, Shift[1]); - mux21x64 mx06(Z, stage5, {onezero, stage5[63:1]}, Shift[0]); - -endmodule // shifter_r64 - -module shifter_l32 (Z, A, Shift); - - input logic [31:0] A; - input logic [4:0] Shift; - - logic [31:0] stage1; - logic [31:0] stage2; - logic [31:0] stage3; - logic [31:0] stage4; - logic [15:0] sixteenzeros = 16'h0; - logic [ 7:0] eightzeros = 8'h0; - logic [ 3:0] fourzeros = 4'h0; - logic [ 1:0] twozeros = 2'b00; - logic onezero = 1'b0; - - output logic [31:0] Z; - - mux21x32 mx01(stage1, A, {A[15:0], sixteenzeros}, Shift[4]); - mux21x32 mx02(stage2, stage1, {stage1[23:0], eightzeros}, Shift[3]); - mux21x32 mx03(stage3, stage2, {stage2[27:0], fourzeros}, Shift[2]); - mux21x32 mx04(stage4, stage3, {stage3[29:0], twozeros}, Shift[1]); - mux21x32 mx05(Z , stage4, {stage4[30:0], onezero}, Shift[0]); - -endmodule // shifter_l32 - -module shifter_r32 (Z, A, Shift); - - input logic [31:0] A; - input logic [4:0] Shift; - - logic [31:0] stage1; - logic [31:0] stage2; - logic [31:0] stage3; - logic [31:0] stage4; - logic [15:0] sixteenzeros = 16'h0; - logic [ 7:0] eightzeros = 8'h0; - logic [ 3:0] fourzeros = 4'h0; - logic [ 1:0] twozeros = 2'b00; - logic onezero = 1'b0; - - output logic [31:0] Z; - - mux21x32 mx01(stage1, A, {sixteenzeros, A[31:16]}, Shift[4]); - mux21x32 mx02(stage2, stage1, {eightzeros, stage1[31:8]}, Shift[3]); - mux21x32 mx03(stage3, stage2, {fourzeros, stage2[31:4]}, Shift[2]); - mux21x32 mx04(stage4, stage3, {twozeros, stage3[31:2]}, Shift[1]); - mux21x32 mx05(Z , stage4, {onezero, stage4[31:1]}, Shift[0]); - -endmodule // shifter_r32 -