mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Divider cleanup
This commit is contained in:
parent
f437336540
commit
571c7d3be4
@ -98,8 +98,8 @@ localparam LOGR = $clog2(RADIX); // r = log(R
|
|||||||
localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated
|
localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated
|
||||||
|
|
||||||
// intermediate division parameters not directly used in fdivsqrt hardware
|
// intermediate division parameters not directly used in fdivsqrt hardware
|
||||||
localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit because square root could be shifted right *** explain better
|
localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right
|
||||||
//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right
|
//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step.
|
||||||
localparam DIVMINb = ((FPDIVMINb<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVMINb; // minimum fractional bits b = max(XLEN, FPDIVMINb)
|
localparam DIVMINb = ((FPDIVMINb<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVMINb; // minimum fractional bits b = max(XLEN, FPDIVMINb)
|
||||||
localparam RESBITS = DIVMINb + LOGR; // number of bits in a result: r integer + b fractional
|
localparam RESBITS = DIVMINb + LOGR; // number of bits in a result: r integer + b fractional
|
||||||
|
|
||||||
|
@ -66,12 +66,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
|||||||
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
|
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
|
||||||
// Integer division needs p fractional + r integer result bits
|
// Integer division needs p fractional + r integer result bits
|
||||||
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
|
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
|
||||||
// FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits
|
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
|
||||||
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
|
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
|
||||||
|
|
||||||
always_comb begin
|
always_comb begin
|
||||||
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit
|
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
|
||||||
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits
|
||||||
|
|
||||||
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
|
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
|
||||||
else ResultBitsE = FPResultBitsE;
|
else ResultBitsE = FPResultBitsE;
|
||||||
|
@ -168,14 +168,20 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
|
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
|
||||||
// Radix Exponent odd Exponent Even
|
// Radix Exponent odd Exponent Even
|
||||||
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
|
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
|
||||||
// 4 2x-4 = 4(x/2 - 1)) x-4 = 4(x/4 - 1)
|
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
|
||||||
// Summary: PreSqrtX = r(x/2or4 - 1)
|
// Summary: PreSqrtX = r(x/2or4 - 1)
|
||||||
|
|
||||||
|
logic [P.DIVb:0] PreSqrtX;
|
||||||
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
|
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
|
||||||
/* mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||||
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1) */
|
else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
|
||||||
|
// This saves one bit in DIVb because there is no initial right shift.
|
||||||
|
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
|
||||||
|
// That is an optimization for another day.
|
||||||
if (P.RADIX == 2) begin
|
if (P.RADIX == 2) begin
|
||||||
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
|
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
|
||||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||||
@ -185,6 +191,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
|
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
|
||||||
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
|
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
|
||||||
end
|
end
|
||||||
|
*/
|
||||||
|
|
||||||
// Initialize X for division or square root
|
// Initialize X for division or square root
|
||||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||||
|
@ -103,7 +103,7 @@ module fdivsqrtuslc4 (
|
|||||||
if (Sqrt) begin
|
if (Sqrt) begin
|
||||||
if (j1) A = 3'b101; // on first sqrt iteration A = .101
|
if (j1) A = 3'b101; // on first sqrt iteration A = .101
|
||||||
else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111
|
else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111
|
||||||
else A = Smsbs[2:0]; // otherwise use A = S (in U0.3 format)
|
else A = Smsbs[2:0]; // otherwise use A = 2S (in U0.3 format)
|
||||||
end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1)
|
end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1)
|
||||||
|
|
||||||
// Select quotient digit from lookup table based on A and W
|
// Select quotient digit from lookup table based on A and W
|
||||||
|
Loading…
Reference in New Issue
Block a user