Parameterized FMA. However, some offsets are not parameterized. See PR #793 for list of changes

This commit is contained in:
David Harris 2024-05-13 15:16:00 -07:00
parent 2dfada0687
commit 175c18da01
10 changed files with 100 additions and 104 deletions

View File

@ -128,12 +128,10 @@ localparam FMALEN = 3*NF + 6;
// because NORMSHIFTSZ becomes limited by convert rather than divider
// The two extra bits are necessary because shiftcorrection dropped them for fcvt.
// May be possible to remove these two bits by modifying shiftcorrection
localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2));
//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2));
//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2));
localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2));
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ)
localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits
// Disable spurious Verilator warnings

View File

@ -196,7 +196,6 @@ localparam cvw_t P = '{
FMALEN : FMALEN,
NORMSHIFTSZ : NORMSHIFTSZ,
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
CORRSHIFTSZ : CORRSHIFTSZ,
LOGR : LOGR,
RK : RK,
FPDUR : FPDUR,

View File

@ -288,7 +288,6 @@ typedef struct packed {
int NORMSHIFTSZ;
int LOGNORMSHIFTSZ;
int FMALEN;
int CORRSHIFTSZ;
// division constants
int LOGR ;

View File

@ -31,14 +31,14 @@ module fmaalign import cvw::*; #(parameter cvw_t P) (
input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [P.NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero, // is the input zero
output logic [3*P.NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic [P.FMALEN-1:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ASticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*P.NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*P.NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic [P.FMALEN+P.NF-1:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [P.FMALEN+P.NF-1:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ; // should the addend be killed
///////////////////////////////////////////////////////////////////////////////
@ -56,7 +56,7 @@ module fmaalign import cvw::*; #(parameter cvw_t P) (
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
assign ZmPreshifted = {Zm,(3*P.NF+5)'(0)};
assign ZmPreshifted = {Zm,(P.FMALEN-1)'(0)};
assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5));
@ -87,6 +87,6 @@ module fmaalign import cvw::*; #(parameter cvw_t P) (
end
end
assign Am = ZmShifted[4*P.NF+5:P.NF];
assign Am = ZmShifted[P.FMALEN+P.NF-1:P.NF];
endmodule

View File

@ -119,14 +119,14 @@ module fpu import cvw::*; #(parameter cvw_t P) (
// Fma Signals
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
logic [3*P.NF+5:0] SmE, SmM; // Sum significand
logic [P.FMALEN-1:0] SmE, SmM; // Sum significand
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
logic [P.NE+1:0] SeE,SeM; // Sum exponent
logic InvAE, InvAM; // Invert addend
logic AsE, AsM; // Addend sign
logic PsE, PsM; // Product sign
logic SsE, SsM; // Sum sign
logic [$clog2(3*P.NF+7)-1:0] SCntE, SCntM; // LZA sum leading zero count
logic [$clog2(P.FMALEN+1)-1:0] SCntE, SCntM; // LZA sum leading zero count
// Cvt Signals
logic [P.NE:0] CeE, CeM; // convert intermediate expoent
@ -358,8 +358,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM});
flopenrc #(3*P.NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(3*P.NF+7)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
flopenrc #(P.FMALEN) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(P.FMALEN+1)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
{FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,

View File

@ -30,13 +30,13 @@
module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [P.NE+1:0] FmaSe, // sum's exponent
input logic [3*P.NF+5:0] FmaSm, // the positive sum
input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // normalization shift count
input logic [P.FMALEN-1:0] FmaSm, // the positive sum
input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // normalization shift count
output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
output logic FmaSZero, // is the result subnormal - calculated before LZA corection
output logic FmaSZero, // is the sum zero
output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
output logic [$clog2(3*P.NF+7)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*P.NF+7:0] FmaShiftIn // is the sum zero
output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt, // normalization shift count
output logic [P.FMALEN+1:0] FmaShiftIn
);
logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias
logic [P.NE+1:0] BiasCorr; // correction for bias
@ -49,7 +49,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent FmaSe-FmaSCnt+NF+2
assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+7)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4);
assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(P.FMALEN+1)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4);
//convert the sum's exponent into the proper precision
if (P.FPSIZES == 1) begin
@ -131,6 +131,6 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) (
// set and calculate the shift input and amount
// - shift once if killing a product and the result is subnormal
assign FmaShiftIn = {2'b0, FmaSm};
if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3): FmaSCnt+1;
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1;
if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1;
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1;
endmodule

View File

@ -44,9 +44,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
input logic FmaPs, // the product's sign
input logic FmaSs, // Sum sign
input logic [P.NE+1:0] FmaSe, // the sum's exponent
input logic [3*P.NF+5:0] FmaSm, // the positive sum
input logic [P.FMALEN-1:0] FmaSm, // the positive sum
input logic FmaASticky, // sticky bit that is calculated during alignment
input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // the normalization shift count
input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic DivSticky, // divider sticky bit
input logic [P.NE+1:0] DivUe, // divsqrt exponent
@ -70,8 +70,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
logic Rs; // result sign
logic [P.NF-1:0] Rf; // Result fraction
logic [P.NE-1:0] Re; // Result exponent
logic Ms; // norMalized sign
logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction
logic Ms; // normalized sign
logic [P.NORMSHIFTSZ-1:0] Mf; // normalized fraction
logic [P.NE+1:0] Me; // normalized exponent
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic UfPlus1; // do you add one (for determining underflow flag)
@ -86,10 +86,10 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// fma signals
logic [P.NE+1:0] FmaMe; // exponent of the normalized sum
logic FmaSZero; // is the sum zero
logic [3*P.NF+7:0] FmaShiftIn; // fma shift input
logic [P.FMALEN+1:0] FmaShiftIn; // fma shift input
logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma
logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt; // normalization shift amount for fma
// division signals
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
@ -154,8 +154,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
always_comb
case(PostProcSel)
2'b10: begin // fma
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+8){1'b0}}};
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.FMALEN-1){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};

View File

@ -32,7 +32,7 @@ module round import cvw::*; #(parameter cvw_t P) (
input logic [2:0] Frm, // rounding mode
input logic [1:0] PostProcSel, // select the postprocessor output
input logic Ms, // normalized sign
input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction
input logic [P.NORMSHIFTSZ-1:0] Mf, // normalized fraction
// fma
input logic FmaOp, // is an fma operation being done?
input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma
@ -123,61 +123,61 @@ module round import cvw::*; #(parameter cvw_t P) (
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
if (XLENPOS == 1)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN
if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
if (XLENPOS == 2)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&IntRes) |
(|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
(|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
(|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&IntRes) |
(|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
(|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
// 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
(|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
// 3: NF > NF1 > XLEN
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) |
(|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
(|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
end else if (P.FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
(|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
(|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
(|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
(|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]);
end
@ -188,32 +188,32 @@ module round import cvw::*; #(parameter cvw_t P) (
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (P.FPSIZES == 1) begin
assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
assign FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1];
assign FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF];
assign FpRound = Mf[P.NORMSHIFTSZ-P.NF-2];
end else if (P.FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];
assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-1] : Mf[P.NORMSHIFTSZ-P.NF1-1];
assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF] : Mf[P.NORMSHIFTSZ-P.NF1];
assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-2] : Mf[P.NORMSHIFTSZ-P.NF1-2];
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
P.FMT: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF];
FpRound = Mf[P.NORMSHIFTSZ-P.NF-2];
end
P.FMT1: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2];
FpGuard = Mf[P.NORMSHIFTSZ-P.NF1-1];
FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF1];
FpRound = Mf[P.NORMSHIFTSZ-P.NF1-2];
end
P.FMT2: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2];
FpGuard = Mf[P.NORMSHIFTSZ-P.NF2-1];
FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF2];
FpRound = Mf[P.NORMSHIFTSZ-P.NF2-2];
end
default: begin
FpGuard = 1'bx;
@ -225,31 +225,31 @@ module round import cvw::*; #(parameter cvw_t P) (
always_comb
case (OutFmt)
2'h3: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
FpGuard = Mf[P.NORMSHIFTSZ-P.Q_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZ-P.Q_NF];
FpRound = Mf[P.NORMSHIFTSZ-P.Q_NF-2];
end
2'h1: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2];
FpGuard = Mf[P.NORMSHIFTSZ-P.D_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZ-P.D_NF];
FpRound = Mf[P.NORMSHIFTSZ-P.D_NF-2];
end
2'h0: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2];
FpGuard = Mf[P.NORMSHIFTSZ-P.S_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZ-P.S_NF];
FpRound = Mf[P.NORMSHIFTSZ-P.S_NF-2];
end
2'h2: begin
FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1];
FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2];
FpGuard = Mf[P.NORMSHIFTSZ-P.H_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZ-P.H_NF];
FpRound = Mf[P.NORMSHIFTSZ-P.H_NF-2];
end
endcase
end
assign Guard = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard;
assign LsbRes = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes;
assign Round = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound;
assign Guard = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-1] : FpGuard;
assign LsbRes = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN] : FpLsbRes;
assign Round = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-2] : FpRound;
always_comb begin
// Determine if you add 1
@ -296,7 +296,7 @@ module round import cvw::*; #(parameter cvw_t P) (
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
// trim unneeded bits from fraction
assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
assign RoundFrac = Mf[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.NF];
// select the exponent
always_comb

View File

@ -41,11 +41,11 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
input logic FmaSZero,
// output
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum after correction
output logic [P.NORMSHIFTSZ-1:0] Mf, // the shifted sum after correction
output logic [P.NE+1:0] Ue // corrected exponent for divider
);
logic [P.CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction
logic [P.NORMSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction
logic ResSubnorm; // is the result Subnormal
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
@ -69,12 +69,12 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
assign RightShift = FmaOp ? LZAPlus1 : LeftShiftQm;
// one bit right shift for FMA or division
mux2 #(P.NORMSHIFTSZ-2) corrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], RightShift, CorrShifted);
mux2 #(P.NORMSHIFTSZ) corrmux({Shifted[P.NORMSHIFTSZ-3:0], 2'b00}, {Shifted[P.NORMSHIFTSZ-2:1], 2'b00}, RightShift, CorrShifted);
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
always_comb
if (FmaOp | (DivOp & ~DivResSubnorm)) Mf = CorrShifted;
else Mf = Shifted[P.NORMSHIFTSZ-1:2];
else Mf = Shifted[P.NORMSHIFTSZ-1:0];
// Determine sum's exponent
// main exponent issues:

View File

@ -98,8 +98,8 @@ module testbench_fp;
logic [P.NE+1:0] Se;
logic ASticky;
logic KillProd;
logic [$clog2(3*P.NF+7)-1:0] SCnt;
logic [3*P.NF+5:0] Sm;
logic [$clog2(P.FMALEN+1)-1:0] SCnt;
logic [P.FMALEN-1:0] Sm;
logic InvA;
logic NegSum;
logic As;