diff --git a/pipelined/src/fpu/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrtiter.sv index bd336fc4..8835b59f 100644 --- a/pipelined/src/fpu/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrtiter.sv @@ -74,6 +74,8 @@ module fdivsqrtiter( logic [`DIVb:0] SNext[`DIVCOPIES-1:0];// U1.b logic [`DIVb:0] SMNext[`DIVCOPIES-1:0];// U1.b logic [`DIVb-1:0] C[`DIVCOPIES-1:0]; // 0.b + logic [`DIVb-1:0] initC; // 0.b + /* verilator lint_on UNOPTFLAT */ logic [`DIVb+3:0] WSN, WCN; // Q4.N-1 logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1 @@ -100,6 +102,8 @@ module fdivsqrtiter( assign NextC = {2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}; end + if (`RADIX == 2) assign initC = {1'b1, {(`DIVb-1){1'b0}}}; // *** note that these are preshifted right by r compared to book + else assign initC = {2'b11, {(`DIVb-2){1'b0}}}; // mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN); mux2 #(`DIVb+4) wsmux(NextWSN, {{3{SqrtE&~XZeroE}}, X}, DivStart, WSN); @@ -107,7 +111,7 @@ module fdivsqrtiter( mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D); - mux2 #(`DIVb) Cmux(NextC, {1'b1, {(`DIVb-1){1'b0}}}, DivStart, CMux); + mux2 #(`DIVb) Cmux(NextC, initC, DivStart, CMux); flopen #(`DIVb) cflop(clk, DivStart|DivBusy, CMux, C[0]); // Divisor Selections @@ -127,7 +131,9 @@ module fdivsqrtiter( .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]), .qn(qn[i])); end else begin: stage - fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, + logic j1; + assign j1 = (i == 0 & C[0][`DIVb-2] & ~C[0][`DIVb-3]); // not quite right *** + fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]), .qn(qn[i])); end diff --git a/pipelined/src/fpu/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrtstage4.sv index a8457b95..d59be8ae 100644 --- a/pipelined/src/fpu/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrtstage4.sv @@ -38,7 +38,7 @@ module fdivsqrtstage4 ( input logic [`DIVb:0] S, SM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb-1:0] C, - input logic SqrtM, + input logic SqrtM, j1, output logic [`DIVb:0] QNext, QMNext, output logic qn, output logic [`DIVb:0] SNext, SMNext, @@ -50,6 +50,7 @@ module fdivsqrtstage4 ( logic [3:0] q; logic [`DIVb+3:0] F; logic [`DIVb+3:0] AddIn; + logic [4:0] Smsbs; // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -59,7 +60,8 @@ module fdivsqrtstage4 ( // 0000 = 0 // 0010 = -1 // 0001 = -2 - qsel4 qsel4(.D, .WS, .WC, .Sqrt(SqrtM), .q); + assign Smsbs = S[`DIVb:`DIVb-4]; + qsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .q); fgen4 fgen4(.s(q), .C({4'b1111, C}), .S({3'b000, S}), .SM({3'b000, SM}), .F); always_comb diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 9b6b3611..031cd9e8 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -94,13 +94,15 @@ endmodule module qsel4 ( input logic [`DIVN-2:0] D, + input logic [4:0] Smsbs, input logic [`DIVb+3:0] WS, WC, - input logic Sqrt, + input logic Sqrt, j1, output logic [3:0] q ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs; - logic [2:0] Dmsbs; + logic [2:0] Dmsbs, A; + assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4]; assign Wmsbs = PreWmsbs[7:1]; assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}}; @@ -161,7 +163,13 @@ module qsel4 ( endcase end end - assign q = QSel4[{Dmsbs,Wmsbs}]; + always_comb + if (Sqrt) begin + if (j1) A = 3'b101; + else if (Smsbs == 5'b10000) A = 3'b111; + else A = Smsbs[2:0]; + end else A = Dmsbs; + assign q = QSel4[{A,Wmsbs}]; endmodule