From 175c18da017dacf91b7d0bed5d7fe370d5f1cf46 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 13 May 2024 15:16:00 -0700 Subject: [PATCH] Parameterized FMA. However, some offsets are not parameterized. See PR #793 for list of changes --- config/shared/config-shared.vh | 6 +- config/shared/parameter-defs.vh | 1 - src/cvw.sv | 1 - src/fpu/fma/fmaalign.sv | 10 +-- src/fpu/fpu.sv | 8 +- src/fpu/postproc/fmashiftcalc.sv | 16 ++-- src/fpu/postproc/postprocess.sv | 16 ++-- src/fpu/postproc/round.sv | 134 ++++++++++++++-------------- src/fpu/postproc/shiftcorrection.sv | 8 +- testbench/testbench_fp.sv | 4 +- 10 files changed, 100 insertions(+), 104 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index ae511e8c7..02d60c4f4 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -128,12 +128,10 @@ localparam FMALEN = 3*NF + 6; // because NORMSHIFTSZ becomes limited by convert rather than divider // The two extra bits are necessary because shiftcorrection dropped them for fcvt. // May be possible to remove these two bits by modifying shiftcorrection -localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2)); -//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2)); +//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2)); +localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2)); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) -localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits - // Disable spurious Verilator warnings diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index 026794b4b..1b99a9175 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -196,7 +196,6 @@ localparam cvw_t P = '{ FMALEN : FMALEN, NORMSHIFTSZ : NORMSHIFTSZ, LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, - CORRSHIFTSZ : CORRSHIFTSZ, LOGR : LOGR, RK : RK, FPDUR : FPDUR, diff --git a/src/cvw.sv b/src/cvw.sv index 0a4cf1549..b43772b41 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -288,7 +288,6 @@ typedef struct packed { int NORMSHIFTSZ; int LOGNORMSHIFTSZ; int FMALEN; - int CORRSHIFTSZ; // division constants int LOGR ; diff --git a/src/fpu/fma/fmaalign.sv b/src/fpu/fma/fmaalign.sv index 4fc796fda..c6f0afebc 100644 --- a/src/fpu/fma/fmaalign.sv +++ b/src/fpu/fma/fmaalign.sv @@ -31,14 +31,14 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [P.NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero - output logic [3*P.NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic [P.FMALEN-1:0] Am, // addend aligned for addition in U(NF+5.2NF+1) output logic ASticky, // Sticky bit calculated from the aliged addend output logic KillProd // should the product be set to zero ); logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*P.NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*P.NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [P.FMALEN+P.NF-1:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [P.FMALEN+P.NF-1:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; // should the addend be killed /////////////////////////////////////////////////////////////////////////////// @@ -56,7 +56,7 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - assign ZmPreshifted = {Zm,(3*P.NF+5)'(0)}; + assign ZmPreshifted = {Zm,(P.FMALEN-1)'(0)}; assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero; assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5)); @@ -87,6 +87,6 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( end end - assign Am = ZmShifted[4*P.NF+5:P.NF]; + assign Am = ZmShifted[P.FMALEN+P.NF-1:P.NF]; endmodule diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 8f4297ec0..4cf17890c 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -119,14 +119,14 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // Fma Signals logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying - logic [3*P.NF+5:0] SmE, SmM; // Sum significand + logic [P.FMALEN-1:0] SmE, SmM; // Sum significand logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output logic [P.NE+1:0] SeE,SeM; // Sum exponent logic InvAE, InvAM; // Invert addend logic AsE, AsM; // Addend sign logic PsE, PsM; // Product sign logic SsE, SsM; // Sum sign - logic [$clog2(3*P.NF+7)-1:0] SCntE, SCntM; // LZA sum leading zero count + logic [$clog2(P.FMALEN+1)-1:0] SCntE, SCntM; // LZA sum leading zero count // Cvt Signals logic [P.NE:0] CeE, CeM; // convert intermediate expoent @@ -358,8 +358,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM}); - flopenrc #(3*P.NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); - flopenrc #($clog2(3*P.NF+7)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, + flopenrc #(P.FMALEN) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #($clog2(P.FMALEN+1)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index 5b0f1175b..27f39e2a5 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -30,13 +30,13 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [P.NE+1:0] FmaSe, // sum's exponent - input logic [3*P.NF+5:0] FmaSm, // the positive sum - input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // normalization shift count + input logic [P.FMALEN-1:0] FmaSm, // the positive sum + input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // normalization shift count output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results - output logic FmaSZero, // is the result subnormal - calculated before LZA corection + output logic FmaSZero, // is the sum zero output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection - output logic [$clog2(3*P.NF+7)-1:0] FmaShiftAmt, // normalization shift count - output logic [3*P.NF+7:0] FmaShiftIn // is the sum zero + output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt, // normalization shift count + output logic [P.FMALEN+1:0] FmaShiftIn ); logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias logic [P.NE+1:0] BiasCorr; // correction for bias @@ -49,7 +49,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent FmaSe-FmaSCnt+NF+2 - assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+7)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4); + assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(P.FMALEN+1)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4); //convert the sum's exponent into the proper precision if (P.FPSIZES == 1) begin @@ -131,6 +131,6 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // set and calculate the shift input and amount // - shift once if killing a product and the result is subnormal assign FmaShiftIn = {2'b0, FmaSm}; - if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3): FmaSCnt+1; - else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1; + if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1; + else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index aa181c5e0..4e893a82e 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -44,9 +44,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic FmaPs, // the product's sign input logic FmaSs, // Sum sign input logic [P.NE+1:0] FmaSe, // the sum's exponent - input logic [3*P.NF+5:0] FmaSm, // the positive sum + input logic [P.FMALEN-1:0] FmaSm, // the positive sum input logic FmaASticky, // sticky bit that is calculated during alignment - input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // the normalization shift count + input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivSticky, // divider sticky bit input logic [P.NE+1:0] DivUe, // divsqrt exponent @@ -70,8 +70,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( logic Rs; // result sign logic [P.NF-1:0] Rf; // Result fraction logic [P.NE-1:0] Re; // Result exponent - logic Ms; // norMalized sign - logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction + logic Ms; // normalized sign + logic [P.NORMSHIFTSZ-1:0] Mf; // normalized fraction logic [P.NE+1:0] Me; // normalized exponent logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow logic UfPlus1; // do you add one (for determining underflow flag) @@ -86,10 +86,10 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // fma signals logic [P.NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero - logic [3*P.NF+7:0] FmaShiftIn; // fma shift input + logic [P.FMALEN+1:0] FmaShiftIn; // fma shift input logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection - logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma + logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt; // normalization shift amount for fma // division signals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input @@ -154,8 +154,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( always_comb case(PostProcSel) 2'b10: begin // fma - ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt}; - ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+8){1'b0}}}; + ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.FMALEN-1){1'b0}}, FmaShiftAmt}; + ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}}; end 2'b00: begin // cvt ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt}; diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index 15c9b4538..c99d5185c 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -32,7 +32,7 @@ module round import cvw::*; #(parameter cvw_t P) ( input logic [2:0] Frm, // rounding mode input logic [1:0] PostProcSel, // select the postprocessor output input logic Ms, // normalized sign - input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction + input logic [P.NORMSHIFTSZ-1:0] Mf, // normalized fraction // fma input logic FmaOp, // is an fma operation being done? input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma @@ -123,61 +123,61 @@ module round import cvw::*; #(parameter cvw_t P) ( // | NF |1|1| // ^ ^ if floating point result // ^ if not an FMA result - if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + if (XLENPOS == 1)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN - if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 2)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&IntRes) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); end else if (P.FPSIZES == 2) begin // XLEN is either 64 or 32 // so half and single are always smaller then XLEN // 1: XLEN > NF > NF1 - if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~OutFmt) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&IntRes) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); end else if (P.FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); end else if (P.FPSIZES == 4) begin // Quad precision will always be greater than XLEN // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | - (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | - (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | + (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]); // 3: NF > NF1 > XLEN // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | - (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | + (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]); end @@ -188,32 +188,32 @@ module round import cvw::*; #(parameter cvw_t P) ( // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (P.FPSIZES == 1) begin - assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; - assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; - assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; + assign FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1]; + assign FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF]; + assign FpRound = Mf[P.NORMSHIFTSZ-P.NF-2]; end else if (P.FPSIZES == 2) begin - assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1]; - assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1]; - assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2]; + assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-1] : Mf[P.NORMSHIFTSZ-P.NF1-1]; + assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF] : Mf[P.NORMSHIFTSZ-P.NF1]; + assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-2] : Mf[P.NORMSHIFTSZ-P.NF1-2]; end else if (P.FPSIZES == 3) begin always_comb case (OutFmt) P.FMT: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.NF-2]; end P.FMT1: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1]; - FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.NF1-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF1]; + FpRound = Mf[P.NORMSHIFTSZ-P.NF1-2]; end P.FMT2: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2]; - FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.NF2-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF2]; + FpRound = Mf[P.NORMSHIFTSZ-P.NF2-2]; end default: begin FpGuard = 1'bx; @@ -225,31 +225,31 @@ module round import cvw::*; #(parameter cvw_t P) ( always_comb case (OutFmt) 2'h3: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.Q_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.Q_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.Q_NF-2]; end 2'h1: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.D_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.D_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.D_NF-2]; end 2'h0: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.S_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.S_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.S_NF-2]; end 2'h2: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.H_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.H_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.H_NF-2]; end endcase end - assign Guard = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard; - assign LsbRes = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes; - assign Round = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound; + assign Guard = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-1] : FpGuard; + assign LsbRes = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN] : FpLsbRes; + assign Round = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-2] : FpRound; always_comb begin // Determine if you add 1 @@ -296,7 +296,7 @@ module round import cvw::*; #(parameter cvw_t P) ( assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)}; // trim unneeded bits from fraction - assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF]; + assign RoundFrac = Mf[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.NF]; // select the exponent always_comb diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index 85e96c744..ad811a747 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -41,11 +41,11 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( input logic FmaSZero, // output output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum - output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum after correction + output logic [P.NORMSHIFTSZ-1:0] Mf, // the shifted sum after correction output logic [P.NE+1:0] Ue // corrected exponent for divider ); - logic [P.CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction + logic [P.NORMSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction logic ResSubnorm; // is the result Subnormal logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction logic LeftShiftQm; // should the divsqrt result be shifted one to the left @@ -69,12 +69,12 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( assign RightShift = FmaOp ? LZAPlus1 : LeftShiftQm; // one bit right shift for FMA or division - mux2 #(P.NORMSHIFTSZ-2) corrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], RightShift, CorrShifted); + mux2 #(P.NORMSHIFTSZ) corrmux({Shifted[P.NORMSHIFTSZ-3:0], 2'b00}, {Shifted[P.NORMSHIFTSZ-2:1], 2'b00}, RightShift, CorrShifted); // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits always_comb if (FmaOp | (DivOp & ~DivResSubnorm)) Mf = CorrShifted; - else Mf = Shifted[P.NORMSHIFTSZ-1:2]; + else Mf = Shifted[P.NORMSHIFTSZ-1:0]; // Determine sum's exponent // main exponent issues: diff --git a/testbench/testbench_fp.sv b/testbench/testbench_fp.sv index 75be5ca1e..f800a9fed 100644 --- a/testbench/testbench_fp.sv +++ b/testbench/testbench_fp.sv @@ -98,8 +98,8 @@ module testbench_fp; logic [P.NE+1:0] Se; logic ASticky; logic KillProd; - logic [$clog2(3*P.NF+7)-1:0] SCnt; - logic [3*P.NF+5:0] Sm; + logic [$clog2(P.FMALEN+1)-1:0] SCnt; + logic [P.FMALEN-1:0] Sm; logic InvA; logic NegSum; logic As;