From c2b9e326caed3326ac0e8444ebdf9deb9ae8b148 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 13 May 2024 13:27:29 -0700 Subject: [PATCH 1/5] Fround cleanup --- src/fpu/fround.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fpu/fround.sv b/src/fpu/fround.sv index fb4911253..085e25771 100644 --- a/src/fpu/fround.sv +++ b/src/fpu/fround.sv @@ -51,7 +51,7 @@ module fround import cvw::*; #(parameter cvw_t P) ( // Unbiased exponent assign E = Xe - P.BIAS[P.NE-1:0]; - assign Xep1 = Xe + 1; + assign Xep1 = Xe + 1'b1; ////////////////////////////////////////// // Compute LSB L', rounding bit R' and Sticky bit T' @@ -85,7 +85,7 @@ module fround import cvw::*; #(parameter cvw_t P) ( assign Lnonneg = |(Xm & HotE); assign Rnonneg = |(Xm & HotEP1); assign Trunc = Xm & IMask; - assign {Two, Rnd} = Trunc + HotE; // Two means result is 10.000000 = 2.0 + assign {Two, Rnd} = Trunc + HotE; // Two means result overflowed to 10.000000 = 2.0 // mux and AND-OR logic to select final rounding bits mux2 #(1) Lmux(Lnonneg, 1'b0, Elt0, Lp); From 2dfada06871644dbb0209526c5eebba5dd2604e9 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 13 May 2024 14:01:36 -0700 Subject: [PATCH 2/5] Started parameterizing FMA --- config/shared/config-shared.vh | 7 +++++-- config/shared/parameter-defs.vh | 1 + src/cvw.sv | 1 + src/fpu/fma/fma.sv | 10 +++++----- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 2401cada2..ae511e8c7 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -115,6 +115,9 @@ localparam CVTLEN = (ZFA_SUPPORTED & D_SUPPORTED) ? `max(BASECVTLEN, 32'd84) : B localparam LLEN = `max($unsigned(FLEN), $unsigned(XLEN)); localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); +// size of FMA output +localparam FMALEN = 3*NF + 6; + // NORMSHIFTSIZE is the bits out of the normalization shifter // RV32F: max(32+23+1, 2(23)+4, 3(23)+6) = 3*23+6 = 75 // RV64F: max(64+23+1, 64 + 23 + 2, 3*23+6) = 89 @@ -125,8 +128,8 @@ localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); // because NORMSHIFTSZ becomes limited by convert rather than divider // The two extra bits are necessary because shiftcorrection dropped them for fcvt. // May be possible to remove these two bits by modifying shiftcorrection -localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+8)); -//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+8)); +localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2)); +//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2)); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index 96440490c..026794b4b 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -193,6 +193,7 @@ localparam cvw_t P = '{ CVTLEN : CVTLEN, LLEN : LLEN, LOGCVTLEN : LOGCVTLEN, + FMALEN : FMALEN, NORMSHIFTSZ : NORMSHIFTSZ, LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, CORRSHIFTSZ : CORRSHIFTSZ, diff --git a/src/cvw.sv b/src/cvw.sv index 1f8e0a1c1..0a4cf1549 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -287,6 +287,7 @@ typedef struct packed { int LOGCVTLEN; int NORMSHIFTSZ; int LOGNORMSHIFTSZ; + int FMALEN; int CORRSHIFTSZ; // division constants diff --git a/src/fpu/fma/fma.sv b/src/fpu/fma/fma.sv index 3576b95df..8bf4d4cbb 100644 --- a/src/fpu/fma/fma.sv +++ b/src/fpu/fma/fma.sv @@ -34,13 +34,13 @@ module fma import cvw::*; #(parameter cvw_t P) ( input logic XZero, YZero, ZZero, // is the input zero input logic [2:0] OpCtrl, // operation control output logic ASticky, // sticky bit that is calculated during alignment - output logic [3*P.NF+5:0] Sm, // the positive sum's significand + output logic [P.FMALEN-1:0] Sm, // the positive sum's significand output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other operations) output logic Ps, // the product's sign output logic Ss, // the sum's sign output logic [P.NE+1:0] Se, // the sum's exponent - output logic [$clog2(3*P.NF+7)-1:0] SCnt // normalization shift count + output logic [$clog2(P.FMALEN+1)-1:0] SCnt // normalization shift count ); // OpCtrl: @@ -54,8 +54,8 @@ module fma import cvw::*; #(parameter cvw_t P) ( // 111 - sub logic [2*P.NF+1:0] Pm; // the product's significand in U(2.2Nf) format - logic [3*P.NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) - logic [3*P.NF+5:0] AmInv; // aligned addend's mantissa possibly inverted + logic [P.FMALEN-1:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) + logic [P.FMALEN-1:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*P.NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) logic KillProd; // set the product to zero before addition if the product is too small to matter logic [P.NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign @@ -89,6 +89,6 @@ module fma import cvw::*; #(parameter cvw_t P) ( fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); - fmalza #(3*P.NF+6, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); + fmalza #(P.FMALEN, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); endmodule From 175c18da017dacf91b7d0bed5d7fe370d5f1cf46 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 13 May 2024 15:16:00 -0700 Subject: [PATCH 3/5] Parameterized FMA. However, some offsets are not parameterized. See PR #793 for list of changes --- config/shared/config-shared.vh | 6 +- config/shared/parameter-defs.vh | 1 - src/cvw.sv | 1 - src/fpu/fma/fmaalign.sv | 10 +-- src/fpu/fpu.sv | 8 +- src/fpu/postproc/fmashiftcalc.sv | 16 ++-- src/fpu/postproc/postprocess.sv | 16 ++-- src/fpu/postproc/round.sv | 134 ++++++++++++++-------------- src/fpu/postproc/shiftcorrection.sv | 8 +- testbench/testbench_fp.sv | 4 +- 10 files changed, 100 insertions(+), 104 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index ae511e8c7..02d60c4f4 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -128,12 +128,10 @@ localparam FMALEN = 3*NF + 6; // because NORMSHIFTSZ becomes limited by convert rather than divider // The two extra bits are necessary because shiftcorrection dropped them for fcvt. // May be possible to remove these two bits by modifying shiftcorrection -localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2)); -//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2)); +//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2)); +localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2)); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) -localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits - // Disable spurious Verilator warnings diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index 026794b4b..1b99a9175 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -196,7 +196,6 @@ localparam cvw_t P = '{ FMALEN : FMALEN, NORMSHIFTSZ : NORMSHIFTSZ, LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, - CORRSHIFTSZ : CORRSHIFTSZ, LOGR : LOGR, RK : RK, FPDUR : FPDUR, diff --git a/src/cvw.sv b/src/cvw.sv index 0a4cf1549..b43772b41 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -288,7 +288,6 @@ typedef struct packed { int NORMSHIFTSZ; int LOGNORMSHIFTSZ; int FMALEN; - int CORRSHIFTSZ; // division constants int LOGR ; diff --git a/src/fpu/fma/fmaalign.sv b/src/fpu/fma/fmaalign.sv index 4fc796fda..c6f0afebc 100644 --- a/src/fpu/fma/fmaalign.sv +++ b/src/fpu/fma/fmaalign.sv @@ -31,14 +31,14 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [P.NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero - output logic [3*P.NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic [P.FMALEN-1:0] Am, // addend aligned for addition in U(NF+5.2NF+1) output logic ASticky, // Sticky bit calculated from the aliged addend output logic KillProd // should the product be set to zero ); logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*P.NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*P.NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [P.FMALEN+P.NF-1:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [P.FMALEN+P.NF-1:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; // should the addend be killed /////////////////////////////////////////////////////////////////////////////// @@ -56,7 +56,7 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - assign ZmPreshifted = {Zm,(3*P.NF+5)'(0)}; + assign ZmPreshifted = {Zm,(P.FMALEN-1)'(0)}; assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero; assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5)); @@ -87,6 +87,6 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( end end - assign Am = ZmShifted[4*P.NF+5:P.NF]; + assign Am = ZmShifted[P.FMALEN+P.NF-1:P.NF]; endmodule diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 8f4297ec0..4cf17890c 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -119,14 +119,14 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // Fma Signals logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying - logic [3*P.NF+5:0] SmE, SmM; // Sum significand + logic [P.FMALEN-1:0] SmE, SmM; // Sum significand logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output logic [P.NE+1:0] SeE,SeM; // Sum exponent logic InvAE, InvAM; // Invert addend logic AsE, AsM; // Addend sign logic PsE, PsM; // Product sign logic SsE, SsM; // Sum sign - logic [$clog2(3*P.NF+7)-1:0] SCntE, SCntM; // LZA sum leading zero count + logic [$clog2(P.FMALEN+1)-1:0] SCntE, SCntM; // LZA sum leading zero count // Cvt Signals logic [P.NE:0] CeE, CeM; // convert intermediate expoent @@ -358,8 +358,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM}); - flopenrc #(3*P.NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); - flopenrc #($clog2(3*P.NF+7)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, + flopenrc #(P.FMALEN) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #($clog2(P.FMALEN+1)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index 5b0f1175b..27f39e2a5 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -30,13 +30,13 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [P.NE+1:0] FmaSe, // sum's exponent - input logic [3*P.NF+5:0] FmaSm, // the positive sum - input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // normalization shift count + input logic [P.FMALEN-1:0] FmaSm, // the positive sum + input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // normalization shift count output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results - output logic FmaSZero, // is the result subnormal - calculated before LZA corection + output logic FmaSZero, // is the sum zero output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection - output logic [$clog2(3*P.NF+7)-1:0] FmaShiftAmt, // normalization shift count - output logic [3*P.NF+7:0] FmaShiftIn // is the sum zero + output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt, // normalization shift count + output logic [P.FMALEN+1:0] FmaShiftIn ); logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias logic [P.NE+1:0] BiasCorr; // correction for bias @@ -49,7 +49,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent FmaSe-FmaSCnt+NF+2 - assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+7)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4); + assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(P.FMALEN+1)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4); //convert the sum's exponent into the proper precision if (P.FPSIZES == 1) begin @@ -131,6 +131,6 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // set and calculate the shift input and amount // - shift once if killing a product and the result is subnormal assign FmaShiftIn = {2'b0, FmaSm}; - if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3): FmaSCnt+1; - else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1; + if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1; + else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index aa181c5e0..4e893a82e 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -44,9 +44,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic FmaPs, // the product's sign input logic FmaSs, // Sum sign input logic [P.NE+1:0] FmaSe, // the sum's exponent - input logic [3*P.NF+5:0] FmaSm, // the positive sum + input logic [P.FMALEN-1:0] FmaSm, // the positive sum input logic FmaASticky, // sticky bit that is calculated during alignment - input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // the normalization shift count + input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivSticky, // divider sticky bit input logic [P.NE+1:0] DivUe, // divsqrt exponent @@ -70,8 +70,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( logic Rs; // result sign logic [P.NF-1:0] Rf; // Result fraction logic [P.NE-1:0] Re; // Result exponent - logic Ms; // norMalized sign - logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction + logic Ms; // normalized sign + logic [P.NORMSHIFTSZ-1:0] Mf; // normalized fraction logic [P.NE+1:0] Me; // normalized exponent logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow logic UfPlus1; // do you add one (for determining underflow flag) @@ -86,10 +86,10 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // fma signals logic [P.NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero - logic [3*P.NF+7:0] FmaShiftIn; // fma shift input + logic [P.FMALEN+1:0] FmaShiftIn; // fma shift input logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection - logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma + logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt; // normalization shift amount for fma // division signals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input @@ -154,8 +154,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( always_comb case(PostProcSel) 2'b10: begin // fma - ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt}; - ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+8){1'b0}}}; + ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.FMALEN-1){1'b0}}, FmaShiftAmt}; + ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}}; end 2'b00: begin // cvt ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt}; diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index 15c9b4538..c99d5185c 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -32,7 +32,7 @@ module round import cvw::*; #(parameter cvw_t P) ( input logic [2:0] Frm, // rounding mode input logic [1:0] PostProcSel, // select the postprocessor output input logic Ms, // normalized sign - input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction + input logic [P.NORMSHIFTSZ-1:0] Mf, // normalized fraction // fma input logic FmaOp, // is an fma operation being done? input logic [P.NE+1:0] FmaMe, // exponent of the normalized sum for fma @@ -123,61 +123,61 @@ module round import cvw::*; #(parameter cvw_t P) ( // | NF |1|1| // ^ ^ if floating point result // ^ if not an FMA result - if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + if (XLENPOS == 1)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN - if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 2)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&IntRes) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); end else if (P.FPSIZES == 2) begin // XLEN is either 64 or 32 // so half and single are always smaller then XLEN // 1: XLEN > NF > NF1 - if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~OutFmt) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&IntRes) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); end else if (P.FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]); // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); // 3: NF > NF1 > XLEN - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.NF-2:0]); end else if (P.FPSIZES == 4) begin // Quad precision will always be greater than XLEN // 2: NF > XLEN > NF1 - if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | - (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | - (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); + if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | + (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]); // 3: NF > NF1 > XLEN // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | - (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | - (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | - (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); + if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) | + (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) | + (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]); end @@ -188,32 +188,32 @@ module round import cvw::*; #(parameter cvw_t P) ( // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (P.FPSIZES == 1) begin - assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; - assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; - assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; + assign FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1]; + assign FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF]; + assign FpRound = Mf[P.NORMSHIFTSZ-P.NF-2]; end else if (P.FPSIZES == 2) begin - assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1]; - assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1]; - assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2]; + assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-1] : Mf[P.NORMSHIFTSZ-P.NF1-1]; + assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF] : Mf[P.NORMSHIFTSZ-P.NF1]; + assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-2] : Mf[P.NORMSHIFTSZ-P.NF1-2]; end else if (P.FPSIZES == 3) begin always_comb case (OutFmt) P.FMT: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.NF-2]; end P.FMT1: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1]; - FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.NF1-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF1]; + FpRound = Mf[P.NORMSHIFTSZ-P.NF1-2]; end P.FMT2: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2]; - FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.NF2-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF2]; + FpRound = Mf[P.NORMSHIFTSZ-P.NF2-2]; end default: begin FpGuard = 1'bx; @@ -225,31 +225,31 @@ module round import cvw::*; #(parameter cvw_t P) ( always_comb case (OutFmt) 2'h3: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.Q_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.Q_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.Q_NF-2]; end 2'h1: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.D_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.D_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.D_NF-2]; end 2'h0: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.S_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.S_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.S_NF-2]; end 2'h2: begin - FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1]; - FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF]; - FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2]; + FpGuard = Mf[P.NORMSHIFTSZ-P.H_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZ-P.H_NF]; + FpRound = Mf[P.NORMSHIFTSZ-P.H_NF-2]; end endcase end - assign Guard = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard; - assign LsbRes = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes; - assign Round = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound; + assign Guard = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-1] : FpGuard; + assign LsbRes = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN] : FpLsbRes; + assign Round = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-2] : FpRound; always_comb begin // Determine if you add 1 @@ -296,7 +296,7 @@ module round import cvw::*; #(parameter cvw_t P) ( assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)}; // trim unneeded bits from fraction - assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF]; + assign RoundFrac = Mf[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.NF]; // select the exponent always_comb diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index 85e96c744..ad811a747 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -41,11 +41,11 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( input logic FmaSZero, // output output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum - output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum after correction + output logic [P.NORMSHIFTSZ-1:0] Mf, // the shifted sum after correction output logic [P.NE+1:0] Ue // corrected exponent for divider ); - logic [P.CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction + logic [P.NORMSHIFTSZ-1:0] CorrShifted; // the shifted sum after LZA correction logic ResSubnorm; // is the result Subnormal logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction logic LeftShiftQm; // should the divsqrt result be shifted one to the left @@ -69,12 +69,12 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( assign RightShift = FmaOp ? LZAPlus1 : LeftShiftQm; // one bit right shift for FMA or division - mux2 #(P.NORMSHIFTSZ-2) corrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], RightShift, CorrShifted); + mux2 #(P.NORMSHIFTSZ) corrmux({Shifted[P.NORMSHIFTSZ-3:0], 2'b00}, {Shifted[P.NORMSHIFTSZ-2:1], 2'b00}, RightShift, CorrShifted); // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits always_comb if (FmaOp | (DivOp & ~DivResSubnorm)) Mf = CorrShifted; - else Mf = Shifted[P.NORMSHIFTSZ-1:2]; + else Mf = Shifted[P.NORMSHIFTSZ-1:0]; // Determine sum's exponent // main exponent issues: diff --git a/testbench/testbench_fp.sv b/testbench/testbench_fp.sv index 75be5ca1e..f800a9fed 100644 --- a/testbench/testbench_fp.sv +++ b/testbench/testbench_fp.sv @@ -98,8 +98,8 @@ module testbench_fp; logic [P.NE+1:0] Se; logic ASticky; logic KillProd; - logic [$clog2(3*P.NF+7)-1:0] SCnt; - logic [3*P.NF+5:0] Sm; + logic [$clog2(P.FMALEN+1)-1:0] SCnt; + logic [P.FMALEN-1:0] Sm; logic InvA; logic NegSum; logic As; From c649cfba8377258d3b681e53a39f3bde352561e1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 14 May 2024 10:28:31 -0700 Subject: [PATCH 4/5] Expanded fpcalc to support quad --- examples/fp/fpcalc/Makefile | 8 ++-- examples/fp/fpcalc/fpcalc.c | 94 +++++++++++++++++++++++++++++++++---- 2 files changed, 88 insertions(+), 14 deletions(-) diff --git a/examples/fp/fpcalc/Makefile b/examples/fp/fpcalc/Makefile index 196fdf3d2..e3165231b 100644 --- a/examples/fp/fpcalc/Makefile +++ b/examples/fp/fpcalc/Makefile @@ -2,14 +2,12 @@ CC = gcc CFLAGS = -O3 -Wno-format-overflow -LIBS = -lm -LFLAGS = -L. # Link against the riscv-isa-sim version of SoftFloat rather than # the regular version to get RISC-V NaN behavior IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat -LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a +LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a -lm -lquadmath #IFLAGS = -I../../../addins/SoftFloat-3e/source/include/ -#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a +#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a -lm -lquadmath SRCS = $(wildcard *.c) PROGS = $(patsubst %.c,%,$(SRCS)) @@ -17,7 +15,7 @@ PROGS = $(patsubst %.c,%,$(SRCS)) all: $(PROGS) %: %.c - $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS) + $(CC) $(CFLAGS) -DSOFTFLOAT_FAST_INT64 $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS) clean: rm -f $(PROGS) diff --git a/examples/fp/fpcalc/fpcalc.c b/examples/fp/fpcalc/fpcalc.c index 94bfc9ac1..5a075b69c 100644 --- a/examples/fp/fpcalc/fpcalc.c +++ b/examples/fp/fpcalc/fpcalc.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include // GCC Quad-Math Library #include "softfloat.h" #include "softfloat_types.h" @@ -26,6 +28,12 @@ typedef union dp { double d; } dp; +typedef union qp { + uint64_t v64[2]; + __uint128_t v; + __float128 q; +} qp; + int opSize = 0; @@ -140,6 +148,27 @@ void printF64(char *msg, float64_t f) { // msg, conv.v, conv.d, sci, exp, fract); } +void printF128 (char *msg, float128_t q) { + qp conv; + //__int128_t v128; + int i, j; + char buf[64]; + //v128 = q.v[1]; + //v128 = v128 << 64 | q.v[0]; // use union to convert between hexadecimal and floating-point views + //conv.v = v128; + conv.v64[0] = q.v[0]; // use union to convert between hexadecimal and floating-point views + conv.v64[1] = q.v[1]; // use union to convert between hexadecimal and floating-point views + printf("%s: ", msg); // print out nicely + + // Some compilers can understand %Q for printf on quad precision instead of the + // API call of quadmath_snprintf + // printf("0x%016" PRIx64 "_%016" PRIx64 " = %1.15Qe\n", q.v[1], q.v[0], conv.q); + quadmath_snprintf (buf, sizeof buf, "%1.15Qe", conv.q); + //printf("0x%032" PRIx12 " = %s\n", q.v, buf); + printf("0x%016" PRIx64 "_%016" PRIx64 " = %s\n", q.v[1], q.v[0], buf); + +} + void printFlags(void) { int NX = softfloat_exceptionFlags % 2; int UF = (softfloat_exceptionFlags >> 1) % 2; @@ -160,14 +189,32 @@ void softfloatInit(void) { softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess } -uint64_t parseNum(char *num) { - uint64_t result; +__uint128_t strtoul128(char *num, int base) { + __uint128_t result = 0; + int i; + for (i=0; i= '0' && num[i] <= '9') result += num[i] - '0'; + else if (num[i] >= 'a' && num[i] <= 'f') result += num[i] - 'a' + 10; + else if (num[i] >= 'A' && num[i] <= 'F') result += num[i] - 'A' + 10; + else { + printf("Error: bad character %c in number %s\n", num[i], num); + exit(1); + } + } + return result; +} + +__uint128_t parseNum(char *num) { +// uint64_t result; + __uint128_t result; int size; // size of operands in bytes (2= half, 4=single, 8 = double) if (strlen(num) < 8) size = 2; else if (strlen(num) < 16) size = 4; - else if (strlen(num) < 19) size = 8; + else if (strlen(num) < 32) size = 8; + else if (strlen(num) < 35) size = 16; // *** will need to increase else { - printf("Error: only half, single, and double precision supported"); + printf("Error: only half, single, double, or quad precision supported"); exit(1); } if (opSize != 0) { @@ -179,7 +226,7 @@ uint64_t parseNum(char *num) { opSize = size; //printf ("Operand size is %d\n", opSize); } - result = (uint64_t)strtoul(num, NULL, 16); + result = (__uint128_t)strtoul128(num, 16); //printf("Parsed %s as 0x%lx\n", num, result); return result; } @@ -206,7 +253,8 @@ char parseRound(char *rnd) { int main(int argc, char *argv[]) { - uint64_t xn, yn, zn; + //uint64_t xn, yn, zn; + __uint128_t xn, yn, zn; char op1, op2; char cmd[200]; @@ -217,6 +265,7 @@ int main(int argc, char *argv[]) exit(1); } else { softfloat_roundingMode = softfloat_round_near_even; + //printf("argv[0] = %s arvg[1] = %s argv[2] = %s argv[3] = %s\n", argv[0], argv[1], argv[2], argv[3]); xn = parseNum(argv[1]); yn = parseNum(argv[3]); op1 = parseOp(argv[2]); @@ -241,12 +290,22 @@ int main(int argc, char *argv[]) r = f32_mulAdd(x, y, z); printF32("X", x); printF32("Y", y); printF32("Z", z); printF32("result = X*Y+Z", r); printFlags(); - } else { // opSize = 8 + } else if (opSize == 8) { float64_t x, y, z, r; x.v = xn; y.v = yn; z.v = zn; r = f64_mulAdd(x, y, z); printF64("X", x); printF64("Y", y); printF64("Z", z); printF64("result = X*Y+Z", r); printFlags(); + } else { // opSize = 16 + float128_t x, y, z, r; + qp xc, yc, zc; + xc.v = xn; yc.v = yn; zc.v = zn; + x.v[0] = xc.v64[0]; x.v[1] = xc.v64[1]; + y.v[0] = yc.v64[0]; y.v[1] = yc.v64[1]; + z.v[0] = zc.v64[0]; z.v[1] = zc.v64[1]; + r = f128_mulAdd(x, y, z); + printF128("X", x); printF128("Y", y); printF128("Z", z); + printF128("result = X*Y+Z", r); printFlags(); } } } else { @@ -279,7 +338,7 @@ int main(int argc, char *argv[]) sprintf(cmd, "0x%08x %c 0x%08x", x.v, op1, y.v); printF32(cmd, r); printFlags(); - } else { // opSize = 8 + } else if (opSize == 8) { // opSize = 8 float64_t x, y, r; x.v = xn; y.v = yn; switch (op1) { @@ -293,7 +352,24 @@ int main(int argc, char *argv[]) printF64("X", x); printF64("Y", y); sprintf(cmd, "0x%016lx %c 0x%016lx", x.v, op1, y.v); printF64(cmd, r); printFlags(); - + } else { // opSize = 16 + float128_t x, y, r; + qp xc, yc; + xc.v = xn; yc.v = yn; + x.v[0] = xc.v64[0]; x.v[1] = xc.v64[1]; + y.v[0] = yc.v64[0]; y.v[1] = yc.v64[1]; + //x.v = xn; y.v = yn; + switch (op1) { + case 'x': r = f128_mul(x, y); break; + case '+': r = f128_add(x, y); break; + case '-': r = f128_sub(x, y); break; + case '/': r = f128_div(x, y); break; + case '%': r = f128_rem(x, y); break; + default: printf("Unknown op %c\n", op1); exit(1); + } + printF128("X", x); printF128("Y", y); + //sprintf(cmd, "0x%016lx %c 0x%016lx", x.v, op1, y.v); + printF128("Result", r); printFlags(); } } } From 990d40410bd248f3a87326117c32e897131142be Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 14 May 2024 11:11:24 -0700 Subject: [PATCH 5/5] Test using fpcalc for fp_dataset.py --- examples/fp/fpcalc/fpcalc.c | 21 ++++++++++- tests/fp/quad/fp_dataset.py | 69 +++++++++++++++++++++++++---------- tests/fp/quad/fpdatasetgen.py | 6 ++- 3 files changed, 74 insertions(+), 22 deletions(-) mode change 100644 => 100755 tests/fp/quad/fp_dataset.py mode change 100644 => 100755 tests/fp/quad/fpdatasetgen.py diff --git a/examples/fp/fpcalc/fpcalc.c b/examples/fp/fpcalc/fpcalc.c index 5a075b69c..8264b1442 100644 --- a/examples/fp/fpcalc/fpcalc.c +++ b/examples/fp/fpcalc/fpcalc.c @@ -169,6 +169,24 @@ void printF128 (char *msg, float128_t q) { } +void printF128val(float128_t q) { + qp conv; + //__int128_t v128; + int i, j; + char buf[64]; + //v128 = q.v[1]; + //v128 = v128 << 64 | q.v[0]; // use union to convert between hexadecimal and floating-point views + //conv.v = v128; + conv.v64[0] = q.v[0]; // use union to convert between hexadecimal and floating-point views + conv.v64[1] = q.v[1]; // use union to convert between hexadecimal and floating-point views + + // Some compilers can understand %Q for printf on quad precision instead of the + // API call of quadmath_snprintf + // printf("0x%016" PRIx64 "_%016" PRIx64 " = %1.15Qe\n", q.v[1], q.v[0], conv.q); + //quadmath_snprintf (buf, sizeof buf, "%1.15Qe", conv.q); + printf("%016" PRIx64 "%016" PRIx64 "\n", q.v[1], q.v[0]); +} + void printFlags(void) { int NX = softfloat_exceptionFlags % 2; int UF = (softfloat_exceptionFlags >> 1) % 2; @@ -369,7 +387,8 @@ int main(int argc, char *argv[]) } printF128("X", x); printF128("Y", y); //sprintf(cmd, "0x%016lx %c 0x%016lx", x.v, op1, y.v); - printF128("Result", r); printFlags(); + printF128(cmd, r); printFlags(); + printF128val(r); } } } diff --git a/tests/fp/quad/fp_dataset.py b/tests/fp/quad/fp_dataset.py old mode 100644 new mode 100755 index 0b0bd32ca..d095d58f7 --- a/tests/fp/quad/fp_dataset.py +++ b/tests/fp/quad/fp_dataset.py @@ -5,6 +5,7 @@ import random import sys import math from decimal import * +import os sys.set_int_max_str_digits(10000) fzero = ['0x00000000', '0x80000000'] @@ -347,6 +348,35 @@ def comments_parser(coverpoints): cvpts.append((cvpt+ " #nosat",comment)) return cvpts +def softfloat_sub(a, b): + cmd = "$WALLY/examples/fp/fpcalc/fpcalc " + a + " - " + b + result = os.system(cmd) + print("cmd = ", cmd, "returns result = ", result) + return result + +# rs1, rs3, result are hexadecimal strings +def gen_rs2(iflen,opcode,rs1,rs3,result): + if opcode in 'fadd': + rs2 = softfloat_sub(result, rs1) + elif opcode in 'fsub': + rs2 = rs1 - fields_dec_converter(iflen,result[i][0]) + elif opcode in 'fmul': + rs2 = fields_dec_converter(iflen,result[i][0])/rs1 + elif opcode in 'fdiv': + if fields_dec_converter(iflen,result[i][0]) != 0: + rs2 = rs1/fields_dec_converter(iflen,result[i][0]) + elif opcode in 'fsqrt': + rs2 = fields_dec_converter(iflen,result[i][0])*fields_dec_converter(iflen,result[i][0]) + elif opcode in 'fmadd': + rs2 = (fields_dec_converter(iflen,result[i][0]) - rs3)/rs1 + elif opcode in 'fnmadd': + rs2 = (rs3 - fields_dec_converter(iflen,result[i][0]))/rs1 + elif opcode in 'fmsub': + rs2 = (fields_dec_converter(iflen,result[i][0]) + rs3)/rs1 + elif opcode in 'fnmsub': + rs2 = -1*(rs3 + fields_dec_converter(iflen,result[i][0]))/rs1 + return rs2 + def ibm_b1(flen, iflen, opcode, ops): ''' IBM Model B1 Definition: @@ -525,25 +555,26 @@ def ibm_b2(flen, iflen, opcode, ops, int_val = 100, seed = -1): #***Quad support rs1 = fields_dec_converter(iflen,'0x'+hex(int('1'+rs1_bin[2:],2))[3:]) #print(rs1) rs3 = fields_dec_converter(iflen,'0x'+hex(int('1'+rs3_bin[2:],2))[3:]) - if opcode in 'fadd': - rs2 = fields_dec_converter(iflen,result[i][0]) - rs1 - elif opcode in 'fsub': - rs2 = rs1 - fields_dec_converter(iflen,result[i][0]) - elif opcode in 'fmul': - rs2 = fields_dec_converter(iflen,result[i][0])/rs1 - elif opcode in 'fdiv': - if fields_dec_converter(iflen,result[i][0]) != 0: - rs2 = rs1/fields_dec_converter(iflen,result[i][0]) - elif opcode in 'fsqrt': - rs2 = fields_dec_converter(iflen,result[i][0])*fields_dec_converter(iflen,result[i][0]) - elif opcode in 'fmadd': - rs2 = (fields_dec_converter(iflen,result[i][0]) - rs3)/rs1 - elif opcode in 'fnmadd': - rs2 = (rs3 - fields_dec_converter(iflen,result[i][0]))/rs1 - elif opcode in 'fmsub': - rs2 = (fields_dec_converter(iflen,result[i][0]) + rs3)/rs1 - elif opcode in 'fnmsub': - rs2 = -1*(rs3 + fields_dec_converter(iflen,result[i][0]))/rs1 + rs2 = gen_rs2(iflen,opcode,"3FFF8000000000000000000000000000","3FFF4000000000000000000000000000","3FFF8800000000000000000000000000") + # if opcode in 'fadd': + # rs2 = fields_dec_converter(iflen,result[i][0]) - rs1 + # elif opcode in 'fsub': + # rs2 = rs1 - fields_dec_converter(iflen,result[i][0]) + # elif opcode in 'fmul': + # rs2 = fields_dec_converter(iflen,result[i][0])/rs1 + # elif opcode in 'fdiv': + # if fields_dec_converter(iflen,result[i][0]) != 0: + # rs2 = rs1/fields_dec_converter(iflen,result[i][0]) + # elif opcode in 'fsqrt': + # rs2 = fields_dec_converter(iflen,result[i][0])*fields_dec_converter(iflen,result[i][0]) + # elif opcode in 'fmadd': + # rs2 = (fields_dec_converter(iflen,result[i][0]) - rs3)/rs1 + # elif opcode in 'fnmadd': + # rs2 = (rs3 - fields_dec_converter(iflen,result[i][0]))/rs1 + # elif opcode in 'fmsub': + # rs2 = (fields_dec_converter(iflen,result[i][0]) + rs3)/rs1 + # elif opcode in 'fnmsub': + # rs2 = -1*(rs3 + fields_dec_converter(iflen,result[i][0]))/rs1 if(iflen==32): m = struct.unpack('f', struct.pack('f', rs2))[0] diff --git a/tests/fp/quad/fpdatasetgen.py b/tests/fp/quad/fpdatasetgen.py old mode 100644 new mode 100755 index 3d49e7082..6208b1ba3 --- a/tests/fp/quad/fpdatasetgen.py +++ b/tests/fp/quad/fpdatasetgen.py @@ -1,7 +1,9 @@ +#!/usr/bin/python + from fp_dataset import * #coverpoints=ibm_b1(128, 128, 'fadd.q', 2) #ibm_b1(flen, iflen, opcode, ops) -#coverpoints=ibm_b2(128,128,'fadd.q',2) #ibm_b2(flen, iflen, opcode, ops, int_val = 100, seed = -1) -coverpoints=ibm_b2(32,32,'fadd.s',2) #ibm_b2(flen, iflen, opcode, ops,seed = -1) +coverpoints=ibm_b2(128,128,'fadd.q',2) #ibm_b2(flen, iflen, opcode, ops, int_val = 100, seed = -1) +#coverpoints=ibm_b2(32,32,'fadd.s',2) #ibm_b2(flen, iflen, opcode, ops,seed = -1) #print(coverpoints) #quad_precision_hex = "0x3ff00000000000000000000000000001" # Example quad precision hexadecimal value #quad_precision_dec = fields_dec_converter(128, quad_precision_hex)