diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 8420baa1..7e240420 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -43,7 +43,6 @@ module divsqrt( input logic StallM, input logic StallE, output logic DivStickyM, - output logic DivNegStickyM, output logic DivBusy, output logic DivDone, output logic [`NE+1:0] DivCalcExpM, @@ -58,11 +57,12 @@ module divsqrt( logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; logic [`DURLEN-1:0] Dur; + logic NegSticky; srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 57b053da..039876e9 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -70,20 +70,21 @@ module fma( /////////////////////////////////////////////////////////////////////////////// // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - - align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, - .Am, .ZmSticky, .KillProd); - // calculate the signs and take the opperation into account sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, + .Ps, .As, .Am, .ZmSticky, .KillProd); + + + // /////////////////////////////////////////////////////////////////////////////// // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt); + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -142,6 +143,7 @@ endmodule module align( + input logic As, Ps, input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero @@ -172,7 +174,7 @@ module align( // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; - assign KillProd = ACnt[`NE+1]|XZero|YZero; + assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); always_comb @@ -183,7 +185,7 @@ module align( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | if (KillProd) begin - ZmShifted = ZmPreshifted; + ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; ZmSticky = ~(XZero|YZero); // If the addend is too small to effect the addition @@ -221,6 +223,7 @@ module add( input logic [2*`NF+1:0] Pm, // the product's mantissa input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic KillProd, // should the product be set to 0 + input logic ZmSticky, input logic XZero, YZero, // is the input zero output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed @@ -243,13 +246,14 @@ module add( assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign PmKilled = Pm&{2*`NF+2{~KillProd}}; - - - // Do the addition // - calculate a positive and negitive sum in parallel - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)}; + // Zsticky Psticky + // PreSum -1 = don't add 1 +1 = add 2 + // NegPreSum +1 = add 2 -1 = don't add 1 + // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 + assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive assign NegSum = PreSum[3*`NF+6]; @@ -261,7 +265,7 @@ endmodule module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+1:0] P, // product + input logic [2*`NF+3:0] P, // product output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result ); @@ -273,12 +277,9 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; assign G[3*`NF+6:2*`NF+4] = 0; assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:2] = A[2*`NF+3:2]^P; - assign G[2*`NF+3:2] = A[2*`NF+3:2]&P; - assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P; - assign T[1:0] = A[1:0]; - assign G[1:0] = 0; - assign Z[1:0] = ~A[1:0]; + assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; + assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; + assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; // Apply function to determine Leading pattern diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 3c286b50..ae974eb0 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,7 +35,6 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - input logic ZDenorm, output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection @@ -54,7 +53,7 @@ module fmashiftcalc( // calculate the sum's exponent // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 - assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -149,9 +148,9 @@ module fmashiftcalc( // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; - assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift; + assign FmaShiftAmt = FmaNCnt+DenormShift; endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index bd018253..5428481d 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -127,7 +127,6 @@ module fpu ( //divide signals logic [`QLEN-1:0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; - logic DivNegStickyE, DivNegStickyM; logic DivStickyE, DivStickyM; logic DivDoneM; logic [`DURLEN-1:0] EarlyTermShiftM; @@ -288,7 +287,7 @@ module fpu ( // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), - .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal + .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, @@ -384,7 +383,7 @@ module fpu ( postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index 17db0c0b..eb9d3559 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -37,7 +37,6 @@ module lzacorrection( input logic [`NE+1:0] DivDenormShift, input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection - input logic FmaKillProd, // is the product set to zero input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction output logic [`NE+1:0] DivCorrExp, @@ -59,7 +58,7 @@ module lzacorrection( assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 30945532..d7fcb2a0 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -56,7 +56,6 @@ module postprocess ( //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivSticky, - input logic DivNegSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, input logic [`QLEN-1:0] Quot, @@ -153,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, - .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb @@ -183,7 +182,7 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp, + lzacorrection lzacorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); @@ -203,7 +202,7 @@ module postprocess ( round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, - .DivSticky, .DivNegSticky, .DivDone, + .DivSticky, .DivDone, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index c73edc08..e2b9cb3e 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -55,7 +55,6 @@ module round( input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE+1:0] DivCorrExp, // the calculated expoent input logic DivSticky, // sticky bit - input logic DivNegSticky, output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction @@ -67,7 +66,6 @@ module round( output logic R, UfLSBRes // bits needed to calculate rounding ); logic LSBRes; // bit used for rounding - least significant bit of the normalized sum - logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result logic NormSumSticky; // normalized sum's sticky bit logic UfSticky; // sticky bit for underlow calculation @@ -254,40 +252,25 @@ module round( assign S = UfSticky | UfRound; - // Deterimine if a small number was supposed to be subtrated - // - for FMA or if division has a negitive sticky bit - assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound); - assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky; - - always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even + 3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down - 3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up - 3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude + 3'b010: CalcPlus1 = Nsgn;//round down + 3'b011: CalcPlus1 = ~Nsgn;//round up + 3'b100: CalcPlus1 = R;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even + 3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up - 3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude + 3'b010: UfCalcPlus1 = Nsgn;//round down + 3'b011: UfCalcPlus1 = ~Nsgn;//round up + 3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase - // Determine if you subtract 1 - case (Frm) - 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero - 3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down - 3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up - 3'b100: CalcMinus1 = 0;//round to nearest max magnitude - default: CalcMinus1 = 1'bx; - endcase end @@ -295,26 +278,25 @@ module round( assign Plus1 = CalcPlus1 & (S | R); assign FpPlus1 = Plus1&~(ToInt&CvtOp); assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky - assign Minus1 = CalcMinus1 & (S | R); // Compute rounded result if (`FPSIZES == 1) begin - assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1}; + assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; end else if (`FPSIZES == 2) begin // \/FLEN+1 // | NE+2 | NF | // '-NE+2-^----NF1----^ // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} : - Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + assign RoundAdd = OutFmt ? {{{`FLEN{1'b0}}}, FpPlus1} : + {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; end else if (`FPSIZES == 3) begin always_comb begin case (OutFmt) - `FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; - `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; + `FMT: RoundAdd = {{{`FLEN{1'b0}}}, FpPlus1}; + `FMT1: RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + `FMT2: RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; default: RoundAdd = (`FLEN+1)'(0); endcase end @@ -322,10 +304,10 @@ module round( end else if (`FPSIZES == 4) begin always_comb begin case (OutFmt) - 2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - 2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; - 2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; - 2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; + 2'h3: RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; + 2'h1: RoundAdd = {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; + 2'h0: RoundAdd = {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; + 2'h2: RoundAdd = {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; endcase end diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 5a7e96e2..b1bf6f56 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -40,6 +40,7 @@ module srtradix4( input logic [`DIVLEN-1:0] X, input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1:0] Quot, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] FirstWS, FirstWC, @@ -106,9 +107,9 @@ module srtradix4( // if starting a new divison set Q to 0 and QM to -1 mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); - flop #(`QLEN) QMreg(clk, QMMux, QM[0]); + flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Quot = Q[0]; + assign Quot = NegSticky ? QM[0] : Q[0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 21e35c36..481b1b22 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -44,7 +44,7 @@ module srtfsm( output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, output logic DivDone, - output logic DivNegStickyE, + output logic NegSticky, output logic DivBusy ); @@ -62,7 +62,7 @@ module srtfsm( assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; - assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? + assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? assign EarlyTermShiftE = step; always_ff @(posedge clk) begin diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 2aec1ab1..033045e7 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -681,7 +681,7 @@ module testbenchfp; postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), - .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky, + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, @@ -697,8 +697,8 @@ module testbenchfp; .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + .XInfE(XInf), .YInfE(YInf), .NegSticky(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .NegSticky(DivNegSticky), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; @@ -854,7 +854,7 @@ end // check if result is correct // - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);