diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 14de5187e..55bca569f 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -98,7 +98,8 @@ localparam LOGR = $clog2(RADIX); // r = log(R localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated // intermediate division parameters not directly used in fdivsqrt hardware -localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit because square root could be shifted right *** explain better +localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right +//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step. localparam DIVMINb = ((FPDIVMINb -1 to choose 0, -1, 1 respectively - // The quotient selection logic is presented for simplicity, not - // for efficiency. You can probably optimize your logic to - // select the proper divisor with less delay. + //if p2 * p1 * p0, W = -1 and choose digit of 0 + assign uz = ((WS[2]^WC[2]) & (WS[1]^WC[1]) & + (WS[0]^WC[0])); - // Quotient equations from EE371 lecture notes 13-20 - assign p = ps ^ pc; - assign g = ps & pc; - - assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) & - (ps[0]^pc[0])); - assign sign = (ps[3]^pc[3])^ - (ps[2] & pc[2] | ((ps[2]^pc[2]) & - (ps[1]&pc[1] | ((ps[1]^pc[1]) & - (ps[0]&pc[0]))))); + // Otherwise determine sign using carry chain: sign = p3 ^ g_2:0 + assign sign = (WS[3]^WC[3])^ + (WS[2] & WC[2] | ((WS[2]^WC[2]) & + (WS[1]&WC[1] | ((WS[1]^WC[1]) & + (WS[0]&WC[0]))))); // Produce digit = +1, 0, or -1 - assign up = magnitude & ~sign; - assign uz = ~magnitude; - assign un = magnitude & sign; + assign up = ~uz & ~sign; + assign un = ~uz & sign; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4.sv similarity index 72% rename from src/fpu/fdivsqrt/fdivsqrtqsel4.sv rename to src/fpu/fdivsqrt/fdivsqrtuslc4.sv index de520bef2..b44b34a35 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrtqsel4.sv +// fdivsqrtuslc4.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu // Modified:13 January 2022 // -// Purpose: Radix 4 Quotient Digit Selection +// Purpose: Table-based Radix 4 Unified Quotient/Square Root Digit Selection // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -26,25 +26,25 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module fdivsqrtqsel4 ( - input logic [2:0] Dmsbs, - input logic [4:0] Smsbs, - input logic [7:0] WSmsbs, WCmsbs, +module fdivsqrtuslc4 ( + input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 + input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation + input logic [7:0] WSmsbs, WCmsbs, // Q4.4 redundant residual most significant bits input logic Sqrt, j1, - output logic [3:0] udigit + output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] A; + logic [7:0] PreWmsbs; // Q4.4 nonredundant residual msbs + logic [6:0] Wmsbs; // Q4.3 truncated nonredundant residual + logic [2:0] A; // U0.3 upper bits of D or Smsbs, discarding integer bit - assign PreWmsbs = WCmsbs + WSmsbs; - assign Wmsbs = PreWmsbs[7:1]; + assign PreWmsbs = WCmsbs + WSmsbs; // add redundant residual to find msbs + assign Wmsbs = PreWmsbs[7:1]; // truncate least significant bit to Q4.3 to index table // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... // Wmsbs = | | - logic [3:0] USel4[1023:0]; + logic [3:0] USel4[1023:0]; // 1024-bit table indexed with 3 bits of A and 7 bits of Wmsbs // Prepopulate selection table; this is constant at compile time always_comb begin @@ -101,10 +101,10 @@ module fdivsqrtqsel4 ( // Select A always_comb if (Sqrt) begin - if (j1) A = 3'b101; - else if (Smsbs == 5'b10000) A = 3'b111; - else A = Smsbs[2:0]; - end else A = Dmsbs; + if (j1) A = 3'b101; // on first sqrt iteration A = .101 + else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111 + else A = Smsbs[2:0]; // otherwise use A = 2S (in U0.3 format) + end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1) // Select quotient digit from lookup table based on A and W assign udigit = USel4[{A,Wmsbs}]; diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv similarity index 90% rename from src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv rename to src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index fe436413e..ccb5e618a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrtqsel4cmp.sv +// fdivsqrtuslc4cmp.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu // Modified:13 January 2022 // -// Purpose: Comparator-based Radix 4 Quotient Digit Selection +// Purpose: Comparator-based Radix 4 Unified Quotient/Square Root Digit Selection // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -26,12 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module fdivsqrtqsel4cmp ( +module fdivsqrtuslc4cmp ( input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation - input logic [7:0] WSmsbs, WCmsbs, // Q4.4 + input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits input logic SqrtE, j1, - output logic [3:0] udigit + output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs;