diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 20f88b6cb..30232a232 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -83,7 +83,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( // Initialize C to -1 for sqrt and -R for division logic [1:0] initCUpper; if(P.RADIX == 4) begin - mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper); + mux2 #(2) cuppermux4(2'b00, 2'b00, SqrtE, initCUpper); // *** Remove this soon end else begin mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper); end @@ -108,9 +108,10 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end else begin: stage - logic j1; - assign j1 = (i == 0 & ~C[0][P.DIVb-1]); - fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, + logic j1,j0; + assign j0 = (i == 0 & ~C[0][P.DIVb+1]); + assign j1 = (i == 1 & ~C[0][P.DIVb+1]); + fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .j0, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index cc77c47d0..e81f5c872 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -174,7 +174,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb:0] PreSqrtX; assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even - mux2 #(P.DIVb+1) sqrtxmux({1'b0,Xnorm[P.DIVb:1]}, {1'b00, Xnorm[P.DIVb:2]}, EvenExp, PreSqrtX); // X/2 if exponent odd, X/4 if exponent even + mux2 #(P.DIVb+1) sqrtxmux({1'b0,Xnorm[P.DIVb:1]}, {1'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even /* // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 0d7a722ff..e7df4399d 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -32,7 +32,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( input logic [P.DIVb:0] U,UM, // U1.DIVb input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb input logic [P.DIVb+1:0] C, // Q2.DIVb - input logic SqrtE, j1, + input logic SqrtE, j1,j0, output logic [P.DIVb+1:0] CNext, // Q2.DIVb output logic un, output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb @@ -54,7 +54,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1 assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual - fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit); + fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .j0, .udigit); assign un = 1'b0; // unused for radix 4 // F generation logic diff --git a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index c0cbe9b1c..69571b105 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -31,7 +31,7 @@ module fdivsqrtuslc4cmp ( input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits - input logic SqrtE, j1, + input logic SqrtE, j0, j1, output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); logic [6:0] Wmsbs; @@ -46,7 +46,9 @@ module fdivsqrtuslc4cmp ( // Wmsbs = | | logic [6:0] mk2, mk1, mk0, mkm1; + logic [6:0] mkj2, mkj1, mkj0, mkjm1; logic [6:0] mks2[7:0], mks1[7:0]; + logic sqrtspecial; // Prepopulate table of mks0 assign mks2[0] = 12; @@ -65,20 +67,27 @@ module fdivsqrtuslc4cmp ( assign mks1[5] = 8; // is the logic any cheaper if this is a 6? assign mks1[6] = 8; assign mks1[7] = 8; + + // handles special case when j = 0 or j = 1 for sqrt + assign mkj2 = 20; // when j = 1 use mk2[101] when j = 0 use anything bigger than 7. + assign mkj1 = j1 ? 8 : 0; // when j = 1 use mk1[101] = 8 and when j = 0 use 0 so we choose u_0 = 1 + assign sqrtspecial = SqrtE & (j1 | j0); - // Choose A for current operation + // Choose A for current operation *** Come back to this always_comb if (SqrtE) begin - if (j1) A = 3'b101; - else if (Smsbs == 5'b10000) A = 3'b111; + //if (j1) A = 3'b101; + if (Smsbs == 5'b10000) A = 3'b111; // *** can we get rid of SMSBs case? else A = Smsbs[2:0]; end else A = Dmsbs; + // Choose selection constants based on a - assign mk2 = mks2[A]; - assign mk1 = mks1[A]; - assign mk0 = -mks1[A]; - assign mkm1 = (A == 3'b000) ? -13 : -mks2[A]; // asymmetry in table + + assign mk2 = sqrtspecial ? mkj2 : mks2[A]; + assign mk1 = sqrtspecial ? mkj1 : mks1[A]; + assign mk0 = -mk1; + assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table *** can we hide? // Compare residual W to selection constants to choose digit always_comb