/////////////////////////////////////////// // intdiv.sv // // Written: James.Stine@okstate.edu 1 February 2021 // Modified: // // Purpose: Integer Divide instructions // // A component of the Wally configurable RISC-V project. // // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software // is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// // *** I added these verilator controls to clean up the // lint output. The linter warnings should be fixed, but now the output is at // least readable. /* verilator lint_off COMBDLY */ /* verilator lint_off IMPLICIT */ `include "idiv-config.vh" module intdiv #(parameter WIDTH=64) (Qf, done, remf, div0, N, D, clk, reset, start, S); input logic [WIDTH-1:0] N, D; input logic clk; input logic reset; input logic start; input logic S; output logic [WIDTH-1:0] Qf; output logic [WIDTH-1:0] remf; output logic div0; output logic done; logic enable; logic state0; logic V; logic [$clog2(WIDTH):0] Num; logic [$clog2(WIDTH)-1:0] P, NumIter, RemShift, RemShiftP; logic [WIDTH-1:0] op1, op2, op1shift, Rem5; logic [WIDTH:0] Qd, Rd, Qd2, Rd2; logic [WIDTH:0] Q2d, Qd3; logic [WIDTH-1:0] Q, Q2, rem0; logic [3:0] quotient; logic otfzero; logic shiftResult; logic [WIDTH-1:0] twoD; logic [WIDTH-1:0] twoN; logic SignD; logic SignN; logic [WIDTH-1:0] QT, remT; logic D_NegOne; logic Max_N; logic [1:0] QR; logic tcQ, tcR; // Check if negative (two's complement) // If so, convert to positive adder #(WIDTH) cpa1 ((D ^ {WIDTH{D[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, D[WIDTH-1]&S}, twoD); adder #(WIDTH) cpa2 ((N ^ {WIDTH{N[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, N[WIDTH-1]&S}, twoN); assign SignD = D[WIDTH-1]; assign SignN = N[WIDTH-1]; // Max N and D = -1 (Overflow) assign Max_N = (~|N[WIDTH-2:0]) & N[WIDTH-1]; assign D_NegOne = &D; // Divider goes the distance to 37 cycles // (thanks to the evil divisor for D = 0x1) // The enable signal turns off register storage thus invalidating // any future cycles. // Shift D, if needed (for integer) // needed to allow qst to be in range for integer // division [1,2) and allow integer divide to work. // // The V or valid bit can be used to determine if D // is 0 and thus a divide by 0 exception. This div0 // exception is given to FSM to tell the operation to // quit gracefully. lod_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD)); shift_left #(WIDTH) p2 (twoD, P, op2); assign op1 = twoN; assign div0 = ~V; // #iter: N = m+v+s = m+2+s (mod k = 0) // v = 2 since \rho < 1 (add 4 to make sure its a ceil) // k = 2 (r = 2^k) adder #($clog2(WIDTH)+1) cpa3 ({1'b0, P}, {{$clog2(WIDTH)+1-3{1'b0}}, shiftResult, ~shiftResult, 1'b0}, Num); // Determine whether need to add just Q/Rem assign shiftResult = P[0]; // div by 2 (ceil) assign NumIter = Num[$clog2(WIDTH):1]; assign RemShift = P; // Avoid critical path of RemShift flopr #($clog2(WIDTH)) reg1 (clk, reset, RemShift, RemShiftP); // FSM to control integer divider // assume inputs are postive edge and // datapath (divider) is negative edge fsm64 #($clog2(WIDTH)) fsm1 (enablev, state0v, donev, otfzerov, start, div0, NumIter, ~clk, reset); flopr #(1) rega (~clk, reset, donev, done); flopr #(1) regc (~clk, reset, otfzerov, otfzero); flopr #(1) regd (~clk, reset, enablev, enable); flopr #(1) rege (~clk, reset, state0v, state0); // To obtain a correct remainder the last bit of the // quotient has to be aligned with a radix-r boundary. // Since the quotient is in the range 1/2 < q < 2 (one // integer bit and m fractional bits), this is achieved by // shifting N right by v+s so that (m+v+s) mod k = 0. And, // the quotient has to be aligned to the integer position. divide4 #(WIDTH) p3 (Qd, Q2d, Rd, quotient, op1, op2, clk, reset, state0, enable, otfzero, shiftResult); // Storage registers to hold contents stable flopenr #(WIDTH+1) reg3 (clk, reset, enable, Rd, Rd2); flopenr #(WIDTH+1) reg4 (clk, reset, enable, Qd, Qd2); flopenr #(WIDTH+1) reg5 (clk, reset, enable, Q2d, Qd3); // Probably not needed - just assigns results assign Q = Qd2[WIDTH-1:0]; assign Rem5 = Rd2[WIDTH:1]; assign Q2 = Qd3[WIDTH-1:0]; // Adjust remainder by m (no need to adjust by shift_right #(WIDTH) p4 (Rem5, RemShiftP, rem0); // Adjust Q/Rem for Signed always_comb casex({S, SignN, SignD}) 3'b000 : QR = 2'b00; 3'b001 : QR = 2'b00; 3'b010 : QR = 2'b00; 3'b011 : QR = 2'b00; 3'b100 : QR = 2'b00; 3'b101 : QR = 2'b10; 3'b110 : QR = 2'b11; 3'b111 : QR = 2'b01; default: QR = 2'b00; endcase // casex ({SignN, SignD, S}) assign {tcQ, tcR} = QR; // When Dividend (N) and/or Divisor (D) are negative (first bit is '1'): // - When N and D are negative: Remainder i // s negative (undergoes a two's complement). // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement). // - When D is negative: Quotient is negative (undergoes a two's complement). adder #(WIDTH) cpa4 ((rem0 ^ {WIDTH{tcR}}), {{WIDTH-1{1'b0}}, tcR}, remT); adder #(WIDTH) cpa5 ((Q ^ {WIDTH{tcQ}}), {{WIDTH-1{1'b0}}, tcQ}, QT); // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec) exception_int #(WIDTH) exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf); endmodule // intdiv // Division by Recurrence (r=4) module divide4 #(parameter WIDTH=64) (Q, Q2, rem0, quotient, op1, op2, clk, reset, state0, enable, otfzero, shiftResult); input logic [WIDTH-1:0] op1, op2; input logic clk, state0; input logic reset; input logic enable; input logic otfzero; input logic shiftResult; output logic [WIDTH:0] rem0; output logic [WIDTH:0] Q; output logic [WIDTH:0] Q2; output logic [3:0] quotient; logic [WIDTH+3:0] Sum, Carry; logic [WIDTH:0] Qstar; logic [WIDTH:0] QMstar; logic [WIDTH:0] QM2star; logic [7:0] qtotal; logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2; logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1; logic [WIDTH+3:0] mdivi_temp, mdivi; logic zero; logic [1:0] qsel; logic [1:0] Qin, QMin; logic CshiftQ, CshiftQM; logic [WIDTH+3:0] rem1, rem2, rem3; logic [WIDTH+3:0] SumR, CarryR; logic [WIDTH:0] Qt; // Create one's complement values of Divisor (for q*D) assign divi1 = {3'h0, op2, 1'b0}; assign divi2 = {2'h0, op2, 2'b0}; assign divi1c = ~divi1; assign divi2c = ~divi2; // Shift x1 if not mod k mux2 #(WIDTH+4) mx1 ({3'b000, op1, 1'b0}, {4'h0, op1}, shiftResult, dive1); // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D) mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN); mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN); // Simplify QST adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal); // q = {+2, +1, -1, -2} else q = 0 qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient); assign ulp = quotient[2]|quotient[3]; assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]); // Map to binary encoding assign qsel[1] = quotient[3]|quotient[2]; assign qsel[0] = quotient[3]|quotient[1]; mux4 #(WIDTH+4) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp); mux2 #(WIDTH+4) mx5 (mdivi_temp, {WIDTH+4{1'b0}}, zero, mdivi); csa #(WIDTH+4) csa1 (mdivi, SumN, {CarryN[WIDTH+3:1], ulp}, Sum, Carry); // regs : save CSA flopenr #(WIDTH+4) reg1 (clk, reset, enable, Sum, SumN2); flopenr #(WIDTH+4) reg2 (clk, reset, enable, Carry, CarryN2); // OTF ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM); otf #(WIDTH+1) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, otfzero, enable, Qstar, QMstar); // Correction and generation of Remainder adder #(WIDTH+4) cpa2 (SumN2[WIDTH+3:0], CarryN2[WIDTH+3:0], rem1); // Add back +D as correction csa #(WIDTH+4) csa2 (CarryN2[WIDTH+3:0], SumN2[WIDTH+3:0], divi1, SumR, CarryR); adder #(WIDTH+4) cpa3 (SumR, CarryR, rem2); // Choose remainder (Rem or Rem+D) mux2 #(WIDTH+4) mx6 (rem1, rem2, rem1[WIDTH+3], rem3); // Choose correct Q or QM mux2 #(WIDTH+1) mx7 (Qstar, QMstar, rem1[WIDTH+3], Qt); // Final results assign rem0 = rem3[WIDTH:0]; assign Q = Qt; endmodule // divide4 module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM); input logic [3:0] quot; output logic [1:0] Qin; output logic [1:0] QMin; output logic CshiftQ; output logic CshiftQM; logic [5:0] qout; // q = {+2, +1, -1, -2} always_comb casex(quot) 4'b0000 : qout = 6'b00_11_0_0; 4'b0001 : qout = 6'b10_01_1_0; 4'b0010 : qout = 6'b11_10_1_0; 4'b0100 : qout = 6'b01_00_0_1; 4'b1000 : qout = 6'b10_01_0_1; default : qout = 6'bxx_xx_x_x; endcase // case (quot) assign {Qin, QMin, CshiftQ, CshiftQM} = qout; endmodule // ls_control // On-the-fly Conversion per Ercegovac/Lang module otf #(parameter WIDTH=8) (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q); input logic [1:0] Qin, QMin; input logic CshiftQ, CshiftQM; input logic clk; input logic reset; input logic enable; output logic [WIDTH-1:0] R2Q; output logic [WIDTH-1:0] R1Q; logic [WIDTH-1:0] Qstar, QMstar; logic [WIDTH-1:0] M1Q, M2Q; // QM mux2 #(WIDTH) m1 (QMstar, Qstar, CshiftQM, M1Q); flopenr #(WIDTH) r1 (clk, reset, enable, {M1Q[WIDTH-3:0], QMin}, R1Q); // Q mux2 #(WIDTH) m2 (Qstar, QMstar, CshiftQ, M2Q); flopenr #(WIDTH) r2 (clk, reset, enable, {M2Q[WIDTH-3:0], Qin}, R2Q); assign Qstar = R2Q; assign QMstar = R1Q; endmodule // otf module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, output logic [WIDTH-1:0] y); assign y = a + b; endmodule // adder module fa (input logic a, b, c, output logic sum, carry); assign sum = a^b^c; assign carry = a&b|a&c|b&c; endmodule // fa module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, output logic [WIDTH-1:0] sum, carry); logic [WIDTH:0] carry_temp; genvar i; generate for (i=0;i B. LT and GT are both '0' if A = B. module magcompare2b (LT, GT, A, B); input logic [1:0] A; input logic [1:0] B; output logic LT; output logic GT; // Determine if A < B using a minimized sum-of-products expression assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; // Determine if A > B using a minimized sum-of-products expression assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; endmodule // magcompare2b // J. E. Stine and M. J. Schulte, "A combined two's complement and // floating-point comparator," 2005 IEEE International Symposium on // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. // doi: 10.1109/ISCAS.2005.1464531 module magcompare8 (LT, EQ, A, B); input logic [7:0] A; input logic [7:0] B; logic [3:0] s; logic [3:0] t; logic [1:0] u; logic [1:0] v; logic GT; //wire LT; output logic EQ; output logic LT; magcompare2b mag1 (s[0], t[0], A[1:0], B[1:0]); magcompare2b mag2 (s[1], t[1], A[3:2], B[3:2]); magcompare2b mag3 (s[2], t[2], A[5:4], B[5:4]); magcompare2b mag4 (s[3], t[3], A[7:6], B[7:6]); magcompare2b mag5 (u[0], v[0], t[1:0], s[1:0]); magcompare2b mag6 (u[1], v[1], t[3:2], s[3:2]); magcompare2b mag7 (LT, GT, v[1:0], u[1:0]); assign EQ = ~(GT | LT); endmodule // magcompare8 module exception_int #(parameter WIDTH=8) (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf); input logic [WIDTH-1:0] Q; input logic [WIDTH-1:0] rem; input logic [WIDTH-1:0] op1; input logic S; input logic div0; input logic Max_N; input logic D_NegOne; output logic [WIDTH-1:0] Qf; output logic [WIDTH-1:0] remf; always_comb case ({div0, S, Max_N, D_NegOne}) 4'b0000 : Qf = Q; 4'b0001 : Qf = Q; 4'b0010 : Qf = Q; 4'b0011 : Qf = Q; 4'b0100 : Qf = Q; 4'b0101 : Qf = Q; 4'b0110 : Qf = Q; 4'b0111 : Qf = {1'b1, {WIDTH-1{1'h0}}}; 4'b1000 : Qf = {WIDTH{1'b1}}; 4'b1001 : Qf = {WIDTH{1'b1}}; 4'b1010 : Qf = {WIDTH{1'b1}}; 4'b1011 : Qf = {WIDTH{1'b1}}; 4'b1100 : Qf = {WIDTH{1'b1}}; 4'b1101 : Qf = {WIDTH{1'b1}}; 4'b1110 : Qf = {WIDTH{1'b1}}; 4'b1111 : Qf = {WIDTH{1'b1}}; default: Qf = Q; endcase always_comb case ({div0, S, Max_N, D_NegOne}) 4'b0000 : remf = rem; 4'b0001 : remf = rem; 4'b0010 : remf = rem; 4'b0011 : remf = rem; 4'b0100 : remf = rem; 4'b0101 : remf = rem; 4'b0110 : remf = rem; 4'b0111 : remf = {WIDTH{1'h0}}; 4'b1000 : remf = op1; 4'b1001 : remf = op1; 4'b1010 : remf = op1; 4'b1011 : remf = op1; 4'b1100 : remf = op1; 4'b1101 : remf = op1; 4'b1110 : remf = op1; 4'b1111 : remf = op1; default: remf = rem; endcase endmodule // exception_int /* verilator lint_on COMBDLY */ /* verilator lint_on IMPLICIT */