diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 25d0d8c65..d991b4b32 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -118,7 +118,7 @@ localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); // RV32F: max(32+23+1, 2(23)+4, 3(23)+6) = 3*23+6 = 75 // RV64F: max(64+23+1, 64 + 23 + 2, 3*23+6) = 89 // RV64D: max(84+52+1, 64+52+2, 3*52+6) = 162 -localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+6)); +localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+8)); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits diff --git a/src/fpu/fma/fma.sv b/src/fpu/fma/fma.sv index bdf2898f7..3576b95df 100644 --- a/src/fpu/fma/fma.sv +++ b/src/fpu/fma/fma.sv @@ -34,13 +34,13 @@ module fma import cvw::*; #(parameter cvw_t P) ( input logic XZero, YZero, ZZero, // is the input zero input logic [2:0] OpCtrl, // operation control output logic ASticky, // sticky bit that is calculated during alignment - output logic [3*P.NF+3:0] Sm, // the positive sum's significand + output logic [3*P.NF+5:0] Sm, // the positive sum's significand output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other operations) output logic Ps, // the product's sign output logic Ss, // the sum's sign output logic [P.NE+1:0] Se, // the sum's exponent - output logic [$clog2(3*P.NF+5)-1:0] SCnt // normalization shift count + output logic [$clog2(3*P.NF+7)-1:0] SCnt // normalization shift count ); // OpCtrl: @@ -54,8 +54,8 @@ module fma import cvw::*; #(parameter cvw_t P) ( // 111 - sub logic [2*P.NF+1:0] Pm; // the product's significand in U(2.2Nf) format - logic [3*P.NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) - logic [3*P.NF+3:0] AmInv; // aligned addend's mantissa possibly inverted + logic [3*P.NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) + logic [3*P.NF+5:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*P.NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) logic KillProd; // set the product to zero before addition if the product is too small to matter logic [P.NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign @@ -89,6 +89,6 @@ module fma import cvw::*; #(parameter cvw_t P) ( fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); - fmalza #(3*P.NF+4, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); + fmalza #(3*P.NF+6, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); endmodule diff --git a/src/fpu/fma/fmaadd.sv b/src/fpu/fma/fmaadd.sv index 00951ee10..995494f2c 100644 --- a/src/fpu/fma/fmaadd.sv +++ b/src/fpu/fma/fmaadd.sv @@ -28,7 +28,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fmaadd import cvw::*; #(parameter cvw_t P) ( - input logic [3*P.NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [3*P.NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) input logic [P.NE-1:0] Ze, // exponent of Z input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other operations) input logic [P.NE+1:0] Pe, // product's exponet @@ -36,14 +36,14 @@ module fmaadd import cvw::*; #(parameter cvw_t P) ( input logic InvA, // invert the aligned addend input logic KillProd, // should the product be set to 0 input logic ASticky, // Alighed addend's sticky bit - output logic [3*P.NF+3:0] AmInv, // aligned addend possibly inverted + output logic [3*P.NF+5:0] AmInv, // aligned addend possibly inverted output logic [2*P.NF+1:0] PmKilled, // the product's mantissa possibly killed output logic Ss, // sum's sign output logic [P.NE+1:0] Se, // sum's exponent - output logic [3*P.NF+3:0] Sm // the positive sum + output logic [3*P.NF+5:0] Sm // the positive sum ); - logic [3*P.NF+3:0] PreSum, NegPreSum; // possibly negative sum + logic [3*P.NF+5:0] PreSum, NegPreSum; // possibly negative sum logic NegSum; // was the sum negative /////////////////////////////////////////////////////////////////////////////// @@ -62,8 +62,8 @@ module fmaadd import cvw::*; #(parameter cvw_t P) ( // addend - prod where product is killed (and not exactly zero) then don't add +1 from negation // ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA // in this case this result is only ever selected when InvA=1 so we can remove &InvA - assign {NegSum, PreSum} = {{P.NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*P.NF+4{1'b0}}, (~ASticky|KillProd)&InvA}; - assign NegPreSum = Am + {{P.NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*P.NF+2)'(0), ~ASticky|~KillProd, 1'b0}; + assign {NegSum, PreSum} = {{P.NF+3{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*P.NF+5{1'b0}}, (~ASticky|KillProd)&InvA}; + assign NegPreSum = Am + {{P.NF+2{1'b1}}, ~PmKilled, 2'b0} + {(3*P.NF+3)'(0), ~ASticky|~KillProd, 2'b0}; // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum : PreSum; diff --git a/src/fpu/fma/fmaalign.sv b/src/fpu/fma/fmaalign.sv index c1d22ec48..4fc796fda 100644 --- a/src/fpu/fma/fmaalign.sv +++ b/src/fpu/fma/fmaalign.sv @@ -31,14 +31,14 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( input logic [P.NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [P.NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero - output logic [3*P.NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic [3*P.NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) output logic ASticky, // Sticky bit calculated from the aliged addend output logic KillProd // should the product be set to zero ); logic [P.NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*P.NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*P.NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [4*P.NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*P.NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) logic KillZ; // should the addend be killed /////////////////////////////////////////////////////////////////////////////// @@ -49,36 +49,37 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( // - negative means Z is larger, so shift Z left // - positive means the product is larger, so shift Z right // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed - assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+2) - {2'b0, Ze}; + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+3) - {2'b0, Ze}; // Default Addition with only inital left shift - // | 53'b0 | 106'b(product) | 1'b0 | + // extra bit at end and beginning so the correct guard bit is calculated when subtracting + // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - assign ZmPreshifted = {Zm,(3*P.NF+3)'(0)}; + assign ZmPreshifted = {Zm,(3*P.NF+5)'(0)}; assign KillProd = (ACnt[P.NE+1]&~ZZero)|XZero|YZero; - assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(3)); + assign KillZ = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5)); always_comb begin // If the product is too small to effect the sum, kill the product - // | 53'b0 | 106'b(product) | 1'b0 | + // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | if (KillProd) begin - ZmShifted = {(P.NF+2)'(0), Zm, (2*P.NF+1)'(0)}; + ZmShifted = {(P.NF+3)'(0), Zm, (2*P.NF+2)'(0)}; ASticky = ~(XZero|YZero); // If the addend is too small to effect the addition // - The addend has to shift two past the end of the product to be considered too small // - The 2 extra bits are needed for rounding - // | 53'b0 | 106'b(product) | 1'b0 | + // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | end else if (KillZ) begin ZmShifted = '0; ASticky = ~ZZero; // If the Addend is shifted right - // | 53'b0 | 106'b(product) | 1'b0 | + // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | end else begin ZmShifted = ZmPreshifted >> ACnt; @@ -86,6 +87,6 @@ module fmaalign import cvw::*; #(parameter cvw_t P) ( end end - assign Am = ZmShifted[4*P.NF+3:P.NF]; + assign Am = ZmShifted[4*P.NF+5:P.NF]; endmodule diff --git a/src/fpu/fma/fmalza.sv b/src/fpu/fma/fmalza.sv index 822f857c2..01439f4d1 100644 --- a/src/fpu/fma/fmalza.sv +++ b/src/fpu/fma/fmalza.sv @@ -41,7 +41,7 @@ module fmalza #(WIDTH, NF) ( logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1 - assign B = {{(NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product + assign B = {{(NF+2){1'b0}}, Pm, 2'b0}; // Zero extend product assign P = A^B; assign G = A&B; diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 22c650ed8..a250827bc 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -116,14 +116,14 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // Fma Signals logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying - logic [3*P.NF+3:0] SmE, SmM; // Sum significand + logic [3*P.NF+5:0] SmE, SmM; // Sum significand logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output logic [P.NE+1:0] SeE,SeM; // Sum exponent logic InvAE, InvAM; // Invert addend logic AsE, AsM; // Addend sign logic PsE, PsM; // Product sign logic SsE, SsM; // Sum sign - logic [$clog2(3*P.NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count + logic [$clog2(3*P.NF+7)-1:0] SCntE, SCntM; // LZA sum leading zero count // Cvt Signals logic [P.NE:0] CeE, CeM; // convert intermediate expoent @@ -351,8 +351,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); - flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, + flopenrc #(3*P.NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #($clog2(3*P.NF+7)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index 5c611a7e3..5b0f1175b 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -30,13 +30,13 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [P.NE+1:0] FmaSe, // sum's exponent - input logic [3*P.NF+3:0] FmaSm, // the positive sum - input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // normalization shift count + input logic [3*P.NF+5:0] FmaSm, // the positive sum + input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // normalization shift count output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results output logic FmaSZero, // is the result subnormal - calculated before LZA corection output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection - output logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt, // normalization shift count - output logic [3*P.NF+5:0] FmaShiftIn // is the sum zero + output logic [$clog2(3*P.NF+7)-1:0] FmaShiftAmt, // normalization shift count + output logic [3*P.NF+7:0] FmaShiftIn // is the sum zero ); logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias logic [P.NE+1:0] BiasCorr; // correction for bias @@ -48,8 +48,8 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); - // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3); + // calculate the sum's exponent FmaSe-FmaSCnt+NF+2 + assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+7)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4); //convert the sum's exponent into the proper precision if (P.FPSIZES == 1) begin @@ -131,6 +131,6 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // set and calculate the shift input and amount // - shift once if killing a product and the result is subnormal assign FmaShiftIn = {2'b0, FmaSm}; - if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2): FmaSCnt+1; - else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1; + if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3): FmaSCnt+1; + else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 17dda38a0..aa181c5e0 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -44,9 +44,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic FmaPs, // the product's sign input logic FmaSs, // Sum sign input logic [P.NE+1:0] FmaSe, // the sum's exponent - input logic [3*P.NF+3:0] FmaSm, // the positive sum + input logic [3*P.NF+5:0] FmaSm, // the positive sum input logic FmaASticky, // sticky bit that is calculated during alignment - input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count + input logic [$clog2(3*P.NF+7)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivSticky, // divider sticky bit input logic [P.NE+1:0] DivUe, // divsqrt exponent @@ -86,7 +86,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // fma signals logic [P.NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero - logic [3*P.NF+5:0] FmaShiftIn; // fma shift input + logic [3*P.NF+7:0] FmaShiftIn; // fma shift input logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma @@ -155,7 +155,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( case(PostProcSel) 2'b10: begin // fma ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt}; - ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+6){1'b0}}}; + ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+8){1'b0}}}; end 2'b00: begin // cvt ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt}; diff --git a/testbench/testbench_fp.sv b/testbench/testbench_fp.sv index 7c80afc62..6b772a918 100644 --- a/testbench/testbench_fp.sv +++ b/testbench/testbench_fp.sv @@ -98,8 +98,8 @@ module testbench_fp; logic [P.NE+1:0] Se; logic ASticky; logic KillProd; - logic [$clog2(3*P.NF+5)-1:0] SCnt; - logic [3*P.NF+3:0] Sm; + logic [$clog2(3*P.NF+7)-1:0] SCnt; + logic [3*P.NF+5:0] Sm; logic InvA; logic NegSum; logic As; @@ -974,8 +974,8 @@ module testbench_fp; if (~(ResMatch & FlagMatch) & CheckNow & (Ans[0] !== 1'bx)) begin errors += 1; $display("\nError in %s", Tests[TestNum]); - $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X[P.FLEN-1:0], Y[P.FLEN-1:0], Z[P.FLEN-1:0], SrcA, Res[P.FLEN-1:0], ResFlg, Ans[P.FLEN-1:0], AnsFlg); + $display("TestNum %d VectorNum %d OpCtrl %d", TestNum, VectorNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; end