Merge pull request #465 from davidharrishmc/dev

fdivsqrt cleanup
This commit is contained in:
Rose Thompson 2023-11-10 22:25:09 -08:00 committed by GitHub
commit 3af8e1ff50
14 changed files with 151 additions and 130 deletions

View File

@ -93,16 +93,20 @@ localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF);
localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2);
localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
// intermediate division parameters not directly used in Divider
localparam FPDIVN = NF+3; // length of floating-point inputs: Ns + 2 = Nf + 3 for 1 integer bit, Nf fracitonal bits, 2 extra bits to shift sqrt into [1/4, 1)]
localparam DIVN = ((FPDIVN<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVN+3; // standard length of input: max(XLEN, NF+2) ***
// division constants
localparam DIVN = (((NF+2<XLEN) & IDIV_ON_FPU) ? XLEN : NF+2); // standard length of input
localparam LOGR = ($clog2(RADIX)); // r = log(R)
localparam RK = (LOGR*DIVCOPIES); // r*k used for intdiv preproc
localparam LOGRK = ($clog2(RK)); // log2(r*k)
localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
localparam DURLEN = ($clog2(FPDUR+1));
localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
localparam DIVBLEN = ($clog2(DIVb+1)-1);
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result
// *** define NF+2, justify, use in DIVN
localparam LOGR = $clog2(RADIX); // r = log(R)
localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated
//localparam FPDUR = (DIVN+1)/RK + 1 + (RADIX/4); // *** relate to algorithm for rest of these
localparam FPDUR = (DIVN+LOGR-1)/RK + 1 ; // ceiling((DIVN+LOGR)/RK)
localparam DURLEN = $clog2(FPDUR+1);
localparam DIVb = FPDUR*RK - 1; // canonical fdiv size (b)
localparam DIVBLEN = $clog2(DIVb+2)-1; // *** where is 2 coming from?
// largest length in IEU/FPU
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF)); // max(XLEN, NF)

View File

@ -177,13 +177,10 @@ localparam cvw_t P = '{
NORMSHIFTSZ : NORMSHIFTSZ,
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
CORRSHIFTSZ : CORRSHIFTSZ,
DIVN : DIVN,
LOGR : LOGR,
RK : RK,
LOGRK : LOGRK,
FPDUR : FPDUR,
DURLEN : DURLEN,
DIVb : DIVb,
DIVBLEN : DIVBLEN,
DIVa : DIVa
DIVBLEN : DIVBLEN
};

View File

@ -269,15 +269,12 @@ typedef struct packed {
int CORRSHIFTSZ;
// division constants
int DIVN ;
int LOGR ;
int RK ;
int LOGRK ;
int FPDUR ;
int DURLEN ;
int DIVb ;
int DIVBLEN ;
int DIVa ;
} cvw_t;

View File

@ -45,8 +45,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
input logic IntDivE, W64E,
output logic DivStickyM,
output logic FDivBusyE, IFDivStartE, FDivDoneE,
output logic [P.NE+1:0] QeM,
output logic [P.DIVb:0] QmM,
output logic [P.NE+1:0] UeM, // Exponent result
output logic [P.DIVb:0] UmM, // Significand result
output logic [P.XLEN-1:0] FIntDivResultM
);
@ -67,17 +67,17 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
// Integer div/rem signals
logic BZeroM; // Denominator is zero
logic IntDivM; // Integer operation
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
logic [P.DIVBLEN:0] IntNormShiftM; // Integer normalizatoin shift amount
logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
logic ISpecialCaseE; // Integer div/remainder special cases
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
// Int-specific
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
.BZeroM, .nM, .mM, .AM,
.BZeroM, .IntNormShiftM, .AM,
.IntDivM, .W64M, .ALTBM, .AsM, .BsM);
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
@ -94,8 +94,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
.QmM, .WZeroE, .DivStickyM,
.UmM, .WZeroE, .DivStickyM,
// Int-specific
.nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
.IntNormShiftM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
.FIntDivResultM);
endmodule

View File

@ -30,10 +30,13 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic IntDivE,
input logic [P.DIVBLEN:0] nE,
input logic [P.DIVBLEN:0] IntResultBitsE,
output logic [P.DURLEN-1:0] CyclesE
);
logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
logic [P.DURLEN+1:0] Nf, FPResultBitsE; // number of fractional bits
logic [P.DIVBLEN:0] ResultBitsE; // number of result bits;
// DIVN = P.NF+3
// NS = NF + 1
// N = NS or NS+2 for div/sqrt.
@ -64,12 +67,21 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
P.Q_FMT: Nf = P.Q_NF;
endcase
// Cycle logic
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
// Integer division needs p fractional + r integer result bits
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
// FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
always_comb begin
if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below?
// if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 and +0 rather than +2; is it related to DIVCOPIES logic below?
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
else ResultBitsE = FPResultBitsE;
assign CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
end
/* verilator lint_on WIDTH */

View File

@ -32,8 +32,9 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
input logic Sqrt,
input logic XZero,
input logic [P.DIVBLEN:0] ell, m,
output logic [P.NE+1:0] Qe
output logic [P.NE+1:0] Ue
);
logic [P.NE-2:0] Bias;
logic [P.NE+1:0] SXExp;
logic [P.NE+1:0] SExp;
@ -63,10 +64,14 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase
end
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
// correct exponent for subnormal input's normalization shifts
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
assign Qe = Sqrt ? SExp : DExp;
// Select square root or division exponent
assign Ue = Sqrt ? SExp : DExp;
endmodule

View File

@ -37,15 +37,15 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
input logic Firstun, SqrtM, SpecialCaseM,
input logic [P.XLEN-1:0] AM,
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M,
input logic [P.DIVBLEN:0] nM, mM,
output logic [P.DIVb:0] QmM,
input logic [P.DIVBLEN:0] IntNormShiftM,
output logic [P.DIVb:0] UmM, // result significand
output logic WZeroE,
output logic DivStickyM,
output logic [P.XLEN-1:0] FIntDivResultM
);
logic [P.DIVb+3:0] W, Sum;
logic [P.DIVb:0] PreQmM;
logic [P.DIVb:0] PreUmM;
logic NegStickyM;
logic weq0E, WZeroM;
logic [P.XLEN-1:0] IntDivResultM;
@ -91,17 +91,16 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
assign Sum = WC + WS;
assign NegStickyM = Sum[P.DIVb+3];
mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
// Integer quotient or remainder correctoin, normalization, and special cases
// Integer quotient or remainder correction, normalization, and special cases
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
logic [P.DIVBLEN:0] NormShiftM;
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
assign W = $signed(Sum) >>> P.LOGR;
assign UnsignedQuotM = {3'b000, PreQmM};
assign UnsignedQuotM = {3'b000, PreUmM};
// Integer remainder: sticky and sign correction muxes
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
@ -110,9 +109,8 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
// Select quotient or remainder and do normalization shift
mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM);
// special case logic
// terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
@ -120,7 +118,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
if (BZeroM) begin // Divide by zero
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = {(P.XLEN){1'b1}};
end else if (ALTBM) begin // Numerator is zero
end else if (ALTBM) begin // Numerator is small
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = '0;
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];

View File

@ -35,25 +35,26 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
input logic SqrtE,
input logic XZeroE,
input logic [2:0] Funct3E,
output logic [P.NE+1:0] QeM,
output logic [P.NE+1:0] UeM,
output logic [P.DIVb+3:0] X, D,
// Int-specific
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic IntDivE, W64E,
output logic ISpecialCaseE,
output logic [P.DURLEN-1:0] CyclesE,
output logic [P.DIVBLEN:0] nM, mM,
output logic [P.DIVBLEN:0] IntNormShiftM,
output logic ALTBM, IntDivM, W64M,
output logic AsM, BsM, BZeroM,
output logic [P.XLEN-1:0] AM
);
logic [P.DIVb-1:0] Xfract, Dfract;
logic [P.DIVb:0] Xnorm, Dnorm;
logic [P.DIVb:0] PreSqrtX;
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs
logic [P.DIVBLEN:0] IntResultBitsE; // bits in integer result
logic NumerZeroE; // Numerator is zero (X or A)
logic AZeroE, BZeroE; // A or B is Zero for integer division
logic SignedDivE; // signed division
@ -89,12 +90,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
// Select integer or floating point inputs
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
end else begin // Int not supported
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
assign NumerZeroE = XZeroE;
end
@ -103,12 +104,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////////////////
// count leading zeros for Subnorm FP and to normalize integer inputs
lzc #(P.DIVb) lzcX (IFX, ell);
lzc #(P.DIVb) lzcY (IFD, mE);
lzc #(P.DIVb+1) lzcX (IFX, ell);
lzc #(P.DIVb+1) lzcY (IFD, mE);
// Normalization shift: shift off leading one
assign Xfract = (IFX << ell) << 1;
assign Dfract = (IFD << mE) << 1;
// Normalization shift: shift leading one into most significant bit
assign Xnorm = (IFX << ell);
assign Dnorm = (IFD << mE);
//////////////////////////////////////////////////////
// Integer Right Shift to digit boundary
@ -122,26 +123,23 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
// calculate number of fractional bits p
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
/* verilator lint_off WIDTH */
assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
/* verilator lint_on WIDTH */
// Integer special cases (terminate immediately)
assign ISpecialCaseE = BZeroE | ALTBE;
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
if (P.LOGRK > 0) begin // more than 1 bit per cycle
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
/* verilator lint_off WIDTH */
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
// calculate right shift amount RightShiftX to complete in discrete number of steps
if (P.RK > 1) begin // more than 1 bit per cycle
logic [$clog2(P.RK)-1:0] RightShiftX;
/* verilator lint_offf WIDTH */
assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps
/* verilator lint_on WIDTH */
end else begin // radix 2 1 copy doesn't require shifting
assign nE = p;
assign DivXShifted = DivX;
end
end else begin
@ -150,18 +148,25 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////////////////
// Floating-Point Preprocessing
// append leading 1 (for nonzero inputs)
// Extend to Q4.b format
// shift square root to be in range [1/4, 1)
// Normalized numbers are shifted right by 1 if the exponent is odd
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
// NOTE: there might be a discrepancy that X is never right shifted by 2. However
// it comes out in the wash and gives the right answer. Investigate later if possible.
// it comes out in the wash and gives the right answer. Investigate later if possible. ***
//////////////////////////////////////////////////////
assign DivX = {3'b000, ~NumerZeroE, Xfract};
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
// Sqrt is initialized on step one as R(X-1), so depends on Radix
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
// If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
// Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
// For Radix 2, this gives 3 leading 1s, followed by the fraction bits
// For Radix 4, this gives 2 leading 1s, followed by the fraction bits (and a zero in the lsb)
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
@ -176,28 +181,37 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
assign X = PreShiftX;
end
// Divisior register
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
// Divisior register
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
// Floating-point exponent
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Ue(UeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
if (P.IDIV_ON_FPU) begin:intpipelineregs
logic [P.DIVBLEN:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
logic RemOpE;
/* verilator lint_off WIDTH */
assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
/* verilator lint_on WIDTH */
assign RemOpE = Funct3E[1];
mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
// pipeline registers
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM);
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (P.XLEN==64)
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
end
endmodule

View File

@ -133,8 +133,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
// divide signals
logic [P.DIVb:0] QmM; // fdivsqrt signifcand
logic [P.NE+1:0] QeM; // fdivsqrt exponent
logic [P.DIVb:0] UmM; // fdivsqrt signifcand
logic [P.NE+1:0] UeM; // fdivsqrt exponent
logic DivStickyM; // fdivsqrt sticky bit
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
@ -242,8 +242,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM, .FIntDivResultM);
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM,
.UmM, .FIntDivResultM);
// compare: fmin/fmax, flt/fle/feq
fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
@ -326,9 +326,9 @@ module fpu import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////////////////////////////////////////////////////
postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivUm(UmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
.ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));

View File

@ -27,8 +27,8 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module divshiftcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb:0] DivQm, // divsqrt significand
input logic [P.NE+1:0] DivQe, // divsqrt exponent
input logic [P.DIVb:0] DivUm, // divsqrt significand
input logic [P.NE+1:0] DivUe, // divsqrt exponent
output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
output logic DivResSubnorm, // is the divsqrt result subnormal
@ -41,23 +41,23 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
// is the result subnormal
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]);
assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
// if the result is subnormal
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// 00000000x.xxxxxx... Exp = DivUe
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
// .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivQe+NF+1-1
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe;
// Left shift amount = DivUe+NF+1-1
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe;
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// 00000000x.xxxxxx... Exp = DivUe
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivUe+1
// 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);
@ -68,5 +68,5 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
// pre-shift the divider result for normalization
assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
endmodule

View File

@ -48,8 +48,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic DivSticky, // divider sticky bit
input logic [P.NE+1:0] DivQe, // divsqrt exponent
input logic [P.DIVb:0] DivQm, // divsqrt significand
input logic [P.NE+1:0] DivUe, // divsqrt exponent
input logic [P.DIVb:0] DivUm, // divsqrt significand
// conversion signals
input logic CvtCs, // the result's sign
input logic [P.NE:0] CvtCe, // the calculated expoent
@ -91,7 +91,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// division singals
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift
logic DivByZero; // divide by zero flag
logic DivResSubnorm; // is the divsqrt result subnormal
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
@ -146,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
// select which unit's output to shift
always_comb
@ -174,7 +174,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// correct for LZA/divsqrt error
shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivUe, .Ue, .FmaSZero, .Shifted, .FmaMe, .Mf);
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -189,7 +189,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// calulate result sign used in rounding unit
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Ue,
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);

View File

@ -39,7 +39,7 @@ module round import cvw::*; #(parameter cvw_t P) (
// divsqrt
input logic DivOp, // is a division opperation being done
input logic DivSticky, // divsqrt sticky bit
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent
// cvt
input logic CvtOp, // is a convert opperation being done
input logic ToInt, // is the cvt op a cvt to integer
@ -300,8 +300,8 @@ module round import cvw::*; #(parameter cvw_t P) (
case(PostProcSel)
2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
// 2'b01: Me = DivDone ? Qe : '0; // divide
2'b01: Me = Qe; // divide
// 2'b01: Me = DivDone ? Ue : '0; // divide
2'b01: Me = Ue; // divide
default: Me = '0;
endcase

View File

@ -31,7 +31,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivResSubnorm, // is the divsqrt result subnormal
input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent
input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
//fma
input logic FmaOp, // is it an fma opperation
@ -41,7 +41,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// output
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [P.NE+1:0] Qe // corrected exponent for divider
output logic [P.NE+1:0] Ue // corrected exponent for divider
);
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
@ -61,7 +61,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
@ -87,5 +87,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
endmodule

View File

@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
always_comb
if (BadNaNBox) begin
// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
end else
PostBox = In;
@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
if (BadNaNBox) begin
case (Fmt)
P.FMT: PostBox = In;
// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]};
P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
default: PostBox = 'x;
@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
if (BadNaNBox) begin
case (Fmt)
2'b11: PostBox = In;
// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]};
// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]};
// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]};
2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};