From 1bcb1725f5c07453172911dfb3641d418bfdba15 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 6 Jan 2023 10:35:23 -0600 Subject: [PATCH 1/3] renamed alot of signals in fpu --- pipelined/src/fpu/fclassify.sv | 18 +-- pipelined/src/fpu/fcvt.sv | 20 ++-- pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv | 2 +- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 2 +- pipelined/src/fpu/fma/fmaadd.sv | 6 +- pipelined/src/fpu/fma/fmalza.sv | 8 +- pipelined/src/fpu/fpu.sv | 26 ++-- pipelined/src/fpu/postproc/cvtshiftcalc.sv | 6 +- pipelined/src/fpu/postproc/divshiftcalc.sv | 24 ++-- pipelined/src/fpu/postproc/flags.sv | 16 +-- pipelined/src/fpu/postproc/fmashiftcalc.sv | 39 +++--- pipelined/src/fpu/postproc/normshift.sv | 4 +- pipelined/src/fpu/postproc/postprocess.sv | 40 +++---- pipelined/src/fpu/postproc/resultsign.sv | 18 +-- pipelined/src/fpu/postproc/round.sv | 111 +++++++++--------- pipelined/src/fpu/postproc/shiftcorrection.sv | 28 ++--- pipelined/src/fpu/postproc/specialcase.sv | 66 +++++------ pipelined/src/fpu/unpack.sv | 8 +- pipelined/src/fpu/unpackinput.sv | 18 +-- 19 files changed, 229 insertions(+), 231 deletions(-) diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index 70049fcf..2c9698bc 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -33,38 +33,38 @@ module fclassify ( input logic Xs, // sign bit input logic XNaN, // is NaN input logic XSNaN, // is signaling NaN - input logic XDenorm,// is denormal + input logic XSubnorm,// is Subnormal input logic XZero, // is zero input logic XInf, // is infinity output logic [`XLEN-1:0] ClassRes// classify result ); - logic PInf, PZero, PNorm, PDenorm; - logic NInf, NZero, NNorm, NDenorm; + logic PInf, PZero, PNorm, PSubnorm; + logic NInf, NZero, NNorm, NSubnorm; logic XNorm; // determine the sub categories - assign XNorm= ~(XNaN | XInf| XDenorm| XZero); + assign XNorm= ~(XNaN | XInf| XSubnorm| XZero); assign PInf = ~Xs&XInf; assign NInf = Xs&XInf; assign PNorm = ~Xs&XNorm; assign NNorm = Xs&XNorm; - assign PDenorm = ~Xs&XDenorm; - assign NDenorm = Xs&XDenorm; + assign PSubnorm = ~Xs&XSubnorm; + assign NSubnorm = Xs&XSubnorm; assign PZero = ~Xs&XZero; assign NZero = Xs&XZero; // determine sub category and combine into the result // bit 0 - -Inf // bit 1 - -Norm - // bit 2 - -Denorm + // bit 2 - -Subnorm // bit 3 - -Zero // bit 4 - +Zero - // bit 5 - +Denorm + // bit 5 - +Subnorm // bit 6 - +Norm // bit 7 - +Inf // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf}; endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 6252eea5..64dd5d4a 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -39,11 +39,11 @@ module fcvt ( input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic ToInt, // is fp->int (since it's writting to the integer register) input logic XZero, // is the input zero - input logic XDenorm, // is the input denormalized + input logic XSubnorm, // is the input Subnormalized input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) output logic [`NE:0] Ce, // the calculated expoent output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by - output logic ResDenormUf,// does the result underflow or is denormalized + output logic ResSubnormUf, // does the result underflow or is Subnormalized output logic Cs, // the result's sign output logic IntZero, // is the integer zero? output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) @@ -165,7 +165,7 @@ module fcvt ( // calculate CalcExp // fp -> fp : // - XExp - Largest bias + new bias - (LeadingZeros+1) - // only do ^ if the input was denormalized + // only do ^ if the input was Subnormalized // - convert the expoenent to the final preciaion (Exp - oldBias + newBias) // - correct the expoent when there is a normalization shift ( + LeadingZeros+1) // - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction @@ -183,7 +183,7 @@ module fcvt ( // | 0's | Mantissa | 0's if nessisary | // | keep | // - // - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options + // - if the input is Subnormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options // int -> fp : largest bias + XLEN-1 - Largest bias + new bias - LeadingZeros = XLEN-1 + NewBias - LeadingZeros // Process: // |XLEN|.0000 @@ -200,7 +200,7 @@ module fcvt ( // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion - assign ResDenormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp; + assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp; /////////////////////////////////////////////////////////////////////////// @@ -211,17 +211,17 @@ module fcvt ( // select the amount to shift by // fp -> int: // - shift left by CalcExp - essentially shifting until the unbiased exponent = 0 - // - don't shift if supposed to shift right (underflowed or denorm input) - // denormalized/undeflowed result fp -> fp: + // - don't shift if supposed to shift right (underflowed or Subnorm input) + // Subnormalized/undeflowed result fp -> fp: // - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0 // ??? -> fp: // - shift left by LeadingZeros - to shift till the result is normalized - // - only shift fp -> fp if the intital value is denormalized + // - only shift fp -> fp if the intital value is Subnormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - always_comb//***change denorm to subnorm + always_comb if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}}; - else if (ResDenormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; + else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; else ShiftAmt = LeadingZeros; /////////////////////////////////////////////////////////////////////////// // sign diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index c7d8e989..fa650a2e 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -70,7 +70,7 @@ module fdivsqrtexpcalc( assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; - // correct exponent for denormalized input's normalization shifts + // correct exponent for Subnormalized input's normalization shifts assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; assign Qe = Sqrt ? SExp : DExp; endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 968d7cbc..2a56c5fc 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -147,7 +147,7 @@ module fdivsqrtpreproc ( assign X = PreShiftX; end - // count leading zeros for denorm FP and to normalize integer inputs + // count leading zeros for Subnorm FP and to normalize integer inputs lzc #(`DIVb) lzcX (IFNormLenX, ell); lzc #(`DIVb) lzcY (IFNormLenD, mE); diff --git a/pipelined/src/fpu/fma/fmaadd.sv b/pipelined/src/fpu/fma/fmaadd.sv index 7ce641db..46e6d5a8 100644 --- a/pipelined/src/fpu/fma/fmaadd.sv +++ b/pipelined/src/fpu/fma/fmaadd.sv @@ -34,7 +34,7 @@ module fmaadd( input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) input logic [2*`NF+1:0] Pm, // the product's mantissa input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations) - input logic InvA, // invert the aligned addend + input logic InvA, // invert the aligned addend input logic KillProd, // should the product be set to 0 input logic ASticky, input logic [`NE-1:0] Ze, @@ -55,9 +55,9 @@ module fmaadd( /////////////////////////////////////////////////////////////////////////////// // Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition - assign AmInv = InvA ? ~Am : Am; + assign AmInv = {3*`NF+4{InvA}}^Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = KillProd ? '0 : Pm; + assign PmKilled = {2*`NF+2{~KillProd}}&Pm; // Do the addition // - calculate a positive and negitive sum in parallel // if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum diff --git a/pipelined/src/fpu/fma/fmalza.sv b/pipelined/src/fpu/fma/fmalza.sv index 9a0de74c..6cfa5fde 100644 --- a/pipelined/src/fpu/fma/fmalza.sv +++ b/pipelined/src/fpu/fma/fmalza.sv @@ -39,23 +39,23 @@ module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and d ); logic [WIDTH:0] F; - logic [WIDTH-1:0] B, P, G, K; + logic [WIDTH-1:0] B, P, Guard, K; logic [WIDTH-1:0] Pp1, Gm1, Km1; assign B = {{(`NF+1){1'b0}}, Pm}; // Zero extend product assign P = A^B; - assign G = A&B; + assign Guard = A&B; assign K= ~A&~B; assign Pp1 = {sub, P[WIDTH-1:1]}; - assign Gm1 = {G[WIDTH-2:0], Cin}; + assign Gm1 = {Guard[WIDTH-2:0], Cin}; assign Km1 = {K[WIDTH-2:0], ~Cin}; // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit assign F[WIDTH] = ~sub&P[WIDTH-1]; - assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); + assign F[WIDTH-1:0] = (Pp1&(Guard&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | Guard&~Gm1)); lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt)); endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 1f749e9e..36748f47 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -101,7 +101,7 @@ module fpu ( logic XNaNQ, YNaNQ; // is the input a NaN - divide logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized + logic XSubnormE, ZSubnormE, ZSubnormM; // is the input Subnormalized logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage @@ -121,7 +121,7 @@ module fpu ( // Cvt Signals logic [`NE:0] CeE, CeM; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by - logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized + logic CvtResSubnormUfE, CvtResSubnormUfM;// does the result underflow or is Subnormalized logic CsE, CsM; // the result's sign logic IntZeroE, IntZeroM; // is the integer zero? logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) @@ -238,11 +238,11 @@ module fpu ( // unpack unit // - splits FP inputs into their various parts - // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) + // - does some classifications (SNaN, NaN, Subnorm, Norm, Zero, Infifnity) unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), - .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), + .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), .ZSubnorm(ZSubnormE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE)); @@ -284,14 +284,14 @@ module fpu ( // classify // - fclass - fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE), + fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); // convert // - fcvt.*.* fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), - .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE), - .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE), + .ToInt(FWriteIntE), .XZero(XZeroE), .XSubnorm(XSubnormE), .Fmt(FmtE), .Ce(CeE), + .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); // data to be stored in memory - to IEU @@ -349,16 +349,16 @@ module fpu ( flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenr #(15) EMFpReg5 (clk, reset, ~StallUnpackedM, - {XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, - {XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); + {XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZSubnormE}, + {XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZSubnormM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, - {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE}, - {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM}); + {CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE}, + {CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM}); // BEGIN MEMORY STAGE @@ -377,8 +377,8 @@ module fpu ( postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */ - .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), - .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), + .ZSubnorm(ZSubnormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged diff --git a/pipelined/src/fpu/postproc/cvtshiftcalc.sv b/pipelined/src/fpu/postproc/cvtshiftcalc.sv index a6ba456e..4b3f2d79 100644 --- a/pipelined/src/fpu/postproc/cvtshiftcalc.sv +++ b/pipelined/src/fpu/postproc/cvtshiftcalc.sv @@ -37,7 +37,7 @@ module cvtshiftcalc( input logic [`NF:0] Xm, // input mantissas input logic [`FMTBITS-1:0] OutFmt, // output format input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) - input logic CvtResDenormUf, // is the conversion result subnormal or underlows + input logic CvtResSubnormUf, // is the conversion result subnormal or underlows output logic CvtResUf, // does the cvt result unerflow output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted ); @@ -57,7 +57,7 @@ module cvtshiftcalc( // - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1 // - ex: for the case 0010000.... (double) // ??? -> fp: - // - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left: + // - if result is Subnormalized or underflowed then we want to shift right i.e. shift right then shift left: // | `NF-1 zeros | Mantissa | 0's if nessisary | // . // - otherwise: @@ -68,7 +68,7 @@ module cvtshiftcalc( always_comb // get rid of round bit if needed // | add sticky bit if needed if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}}; - else if (CvtResDenormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}}; + else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}}; else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}}; // choose the negative of the fraction size diff --git a/pipelined/src/fpu/postproc/divshiftcalc.sv b/pipelined/src/fpu/postproc/divshiftcalc.sv index 0f0e2d36..f53653dd 100644 --- a/pipelined/src/fpu/postproc/divshiftcalc.sv +++ b/pipelined/src/fpu/postproc/divshiftcalc.sv @@ -36,24 +36,24 @@ module divshiftcalc( input logic [`NE+1:0] DivQe, output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, - output logic DivResDenorm, - output logic DivDenormShiftPos + output logic DivResSubnorm, + output logic DivSubnormShiftPos ); - logic [`LOGNORMSHIFTSZ-1:0] NormShift, DivDenormShiftAmt; - logic [`NE+1:0] DivDenormShift; + logic [`LOGNORMSHIFTSZ-1:0] NormShift, DivSubnormShiftAmt; + logic [`NE+1:0] DivSubnormShift; - // is the result denormalized - // if the exponent is 1 then the result needs to be normalized then the result is denormalizes - assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); + // is the result Subnormalized + // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes + assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); - // if the result is denormalized + // if the result is Subnormalized // 00000000x.xxxxxx... Exp = DivQe // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 // Left shift amount = DivQe+NF+1-1 - assign DivDenormShift = (`NE+2)'(`NF)+DivQe; - assign DivDenormShiftPos = ~DivDenormShift[`NE+1]; + assign DivSubnormShift = (`NE+2)'(`NF)+DivQe; + assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1]; // if the result is normalized // 00000000x.xxxxxx... Exp = DivQe @@ -67,8 +67,8 @@ module divshiftcalc( // if the shift amount is negitive then don't shift (keep sticky bit) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivDenormShiftAmt = DivDenormShiftPos ? DivDenormShift[`LOGNORMSHIFTSZ-1:0] : '0; - assign DivShiftAmt = DivResDenorm ? DivDenormShiftAmt : NormShift; + assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0; + assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/postproc/flags.sv b/pipelined/src/fpu/postproc/flags.sv index c56bc651..83c1fdc6 100644 --- a/pipelined/src/fpu/postproc/flags.sv +++ b/pipelined/src/fpu/postproc/flags.sv @@ -51,7 +51,7 @@ module flags( input logic [`NE+1:0] Me, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs - input logic R, G, S, UfPlus1, // bits used to determine rounding + input logic Round, Guard, Sticky, UfPlus1, // bits used to determine rounding output logic DivByZero, output logic IntInvalid, Invalid, Overflow, // flags used to select the res output logic [4:0] PostProcFlg // flags @@ -121,24 +121,24 @@ module flags( // detecting tininess after rounding // the exponent is negitive - // | the result is denormalized - // | | the result is normal and rounded from a denorm + // | the result is Subnormalized + // | | the result is normal and rounded from a Subnorm // | | | and if given an unbounded exponent the result does not round // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid); - //assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid|XZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + //assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid|XZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid); - //assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); + assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid); + //assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) // | | - assign IntInexact = ((CvtCe[`NE]&~XZero)|S|R|G)&~IntInvalid; + assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid; // select the inexact flag to output assign Inexact = ToInt ? IntInexact : FpInexact; diff --git a/pipelined/src/fpu/postproc/fmashiftcalc.sv b/pipelined/src/fpu/postproc/fmashiftcalc.sv index 1110b70f..e491b606 100644 --- a/pipelined/src/fpu/postproc/fmashiftcalc.sv +++ b/pipelined/src/fpu/postproc/fmashiftcalc.sv @@ -34,9 +34,9 @@ module fmashiftcalc( input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic [`NE+1:0] FmaSe, // sum's exponent - output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - output logic FmaSZero, // is the result denormalized - calculated before LZA corection - output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection + output logic [`NE+1:0] NormSumExp, //*** add fma // exponent of the normalized sum not taking into account Subnormal or zero results + output logic FmaSZero, // is the result Subnormalized - calculated before LZA corection + output logic FmaPreResultSubnorm, // is the result Subnormalized - calculated before LZA corection output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+5:0] FmaShiftIn // is the sum zero ); @@ -46,7 +46,6 @@ module fmashiftcalc( /////////////////////////////////////////////////////////////////////////////// // Normalization /////////////////////////////////////////////////////////////////////////////// - //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent @@ -84,13 +83,13 @@ module fmashiftcalc( end - // determine if the result is denormalized + // determine if the result is Subnormalized if (`FPSIZES == 1) begin logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; @@ -98,7 +97,7 @@ module fmashiftcalc( assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; - assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; + assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; @@ -110,10 +109,10 @@ module fmashiftcalc( assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp; always_comb begin case (Fmt) - `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; - `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; - default: FmaPreResultDenorm = 1'bx; + `FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + `FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + `FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + default: FmaPreResultSubnorm = 1'bx; endcase end @@ -129,10 +128,10 @@ module fmashiftcalc( assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp; always_comb begin case (Fmt) - 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; - 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; - 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; + 2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + 2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + 2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + 2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; endcase end @@ -143,14 +142,14 @@ module fmashiftcalc( // - add one from exp // - if kill prod dont add to exp - // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero; + // Determine if the result is Subnormal + // assign FmaPreResultSubnorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero; // set and calculate the shift input and amount - // - shift once if killing a product and the result is denormalized + // - shift once if killing a product and the result is Subnormalized assign FmaShiftIn = {2'b0, FmaSm}; if (`FPSIZES == 1) - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1; + assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1; else - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1; + assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1; endmodule diff --git a/pipelined/src/fpu/postproc/normshift.sv b/pipelined/src/fpu/postproc/normshift.sv index 0d23d2f7..c278988d 100644 --- a/pipelined/src/fpu/postproc/normshift.sv +++ b/pipelined/src/fpu/postproc/normshift.sv @@ -45,7 +45,7 @@ // | keep | // // fp -> fp: - // - if result is denormalized or underflowed: + // - if result is Subnormalized or underflowed: // | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1 // process: // - start @@ -58,7 +58,7 @@ // | 0's | mantissa | 0's | // | keep | // - // - if the input is denormalized: + // - if the input is Subnormalized: // | lzcIn | 0's if nessisary | << ZeroCnt+1 // - plus 1 to shift out the first 1 // diff --git a/pipelined/src/fpu/postproc/postprocess.sv b/pipelined/src/fpu/postproc/postprocess.sv index 4637f370..8f6f283c 100644 --- a/pipelined/src/fpu/postproc/postprocess.sv +++ b/pipelined/src/fpu/postproc/postprocess.sv @@ -41,7 +41,7 @@ module postprocess ( input logic XInf, YInf, ZInf, // inputs are infinity input logic XNaN, YNaN, ZNaN, // inputs are NaN input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs - input logic ZDenorm, // is the original precision denormalized + input logic ZSubnorm, // is the original precision Subnormalized input logic [1:0] PostProcSel, // select result to be written to fp register //fma signals input logic FmaAs, // the modified Z sign - depends on instruction @@ -58,7 +58,7 @@ module postprocess ( // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent - input logic CvtResDenormUf, + input logic CvtResSubnormUf, input logic [`LOGCVTLEN-1:0] CvtShiftAmt, // how much to shift by input logic ToInt, // is fp->int (since it's writting to the integer register) input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) @@ -70,7 +70,7 @@ module postprocess ( ); // general signals - logic Ws; + logic Rs; logic [`NF-1:0] Rf; // Result fraction logic [`NE-1:0] Re; // Result exponent logic Ms; @@ -83,22 +83,22 @@ module postprocess ( logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? logic IntInvalid, Overflow, Invalid; // flags - logic G, R, S; // bits needed to determine rounding + logic Guard, Round, Sticky; // bits needed to determine rounding logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero logic [3*`NF+5:0] FmaShiftIn; // shift input - logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account denormal or zero results - logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection + logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results + logic FmaPreResultSubnorm; // is the result Subnormalized - calculated before LZA corection logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt; // normalization shift count // division singals logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] DivShiftIn; logic [`NE+1:0] Qe; logic DivByZero; - logic DivResDenorm; - logic DivDenormShiftPos; + logic DivResSubnorm; + logic DivSubnormShiftPos; // conversion signals logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted logic [1:0] CvtNegResMsbs; @@ -142,11 +142,11 @@ module postprocess ( // Normalization /////////////////////////////////////////////////////////////////////////////// - cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, + cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, - .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Sqrt, .DivQe, .DivQm, .DivResDenorm, .DivDenormShiftPos, .DivShiftAmt, .DivShiftIn); + .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Sqrt, .DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -175,8 +175,8 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .NormSumExp, - .DivResDenorm, .DivDenormShiftPos, .DivOp, .DivQe, + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, + .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); /////////////////////////////////////////////////////////////////////////////// @@ -193,16 +193,16 @@ module postprocess ( roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, - .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, + .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, .DivS, //.DivDone, - .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me); + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.Frm, .FmaPs, .FmaAs, .R, .S, .G, - .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); + resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard, + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs); /////////////////////////////////////////////////////////////////////////////// // Flags @@ -210,8 +210,8 @@ module postprocess ( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, - .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .G, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero, + .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// @@ -223,6 +223,6 @@ module postprocess ( .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, - .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes); + .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/pipelined/src/fpu/postproc/resultsign.sv b/pipelined/src/fpu/postproc/resultsign.sv index 2e1a3cf0..214c0161 100644 --- a/pipelined/src/fpu/postproc/resultsign.sv +++ b/pipelined/src/fpu/postproc/resultsign.sv @@ -37,11 +37,11 @@ module resultsign( input logic FmaOp, input logic FmaSZero, input logic Mult, - input logic R, - input logic S, - input logic G, + input logic Round, + input logic Sticky, + input logic Guard, input logic Ms, - output logic Ws + output logic Rs ); logic Zeros; @@ -59,9 +59,9 @@ module resultsign( // - Z is killed and P is zero - impossible // Zero sign calculation: // - if a multiply opperation is done, then use the products sign(Ps) - // - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign) + // - if the zero sum is not exactly zero i.e. Round|Sticky use the sign of the exact result (which is the product's sign) // - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign - assign Zeros = (FmaPs^FmaAs)&~(R|G|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + assign Zeros = (FmaPs^FmaAs)&~(Round|Guard|Sticky)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive @@ -70,8 +70,8 @@ module resultsign( // if -p - z then the Sum is negitive assign Infs = ZInf ? FmaAs : FmaPs; always_comb - if(InfIn&FmaOp) Ws = Infs; - else if(FmaSZero&FmaOp) Ws = Zeros; - else Ws = Ms; + if(InfIn&FmaOp) Rs = Infs; + else if(FmaSZero&FmaOp) Rs = Zeros; + else Rs = Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/round.sv b/pipelined/src/fpu/postproc/round.sv index b24884db..0d6395e7 100644 --- a/pipelined/src/fpu/postproc/round.sv +++ b/pipelined/src/fpu/postproc/round.sv @@ -37,15 +37,14 @@ `define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3) module round( - input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single - input logic [2:0] Frm, // rounding mode - input logic FmaOp, - input logic DivOp, - input logic CvtOp, - input logic ToInt, -// input logic DivDone, - input logic [1:0] PostProcSel, - input logic CvtResDenormUf, + input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single + input logic [2:0] Frm, // rounding mode + input logic FmaOp, // is an fma opperation being done? + input logic DivOp, // is a division opperation being done + input logic CvtOp, // is a convert opperation being done + input logic ToInt, // is the cvt op a cvt to integer + input logic [1:0] PostProcSel, // select the postprocessor output + input logic CvtResSubnormUf, // is the cvt result subnormal or underflow input logic CvtResUf, input logic [`CORRSHIFTSZ-1:0] Mf, input logic FmaASticky, // addend's sticky bit @@ -58,17 +57,17 @@ module round( output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction output logic [`NE-1:0] Re, // Result exponent - output logic S, // sticky bit + output logic Sticky, // sticky bit output logic [`NE+1:0] Me, output logic Plus1, - output logic R, G // bits needed to calculate rounding + output logic Round, Guard // bits needed to calculate rounding ); logic UfCalcPlus1; logic NormS; // normalized sum's sticky bit logic [`NF-1:0] RoundFrac; logic FpRes, IntRes; - logic FpG, FpL, FpR; - logic L; // lsb of result + logic FpGuard, FpLsbRes, FpRound; + logic LsbRes; // lsb of result logic CalcPlus1, FpPlus1; logic [`FLEN:0] RoundAdd; // how much to add to the result @@ -77,7 +76,7 @@ module round( /////////////////////////////////////////////////////////////////////////////// // round to nearest even - // {R, S} + // {Round, Sticky} // 0x - do nothing // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) // - don't add 1 if a small number was supposed to be subtracted @@ -95,7 +94,7 @@ module round( // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 // round to nearest max magnitude - // {Guard, R, S} + // {Guard, Round, Sticky} // 0x - do nothing // 10 - tie - Plus1 // - don't add 1 if a small number was supposed to be subtracted @@ -175,101 +174,101 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign S = FmaASticky&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; + assign Sticky = FmaASticky&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (`FPSIZES == 1) begin - assign FpG = Mf[`CORRSHIFTSZ-`NF-1]; - assign FpL = Mf[`CORRSHIFTSZ-`NF]; - assign FpR = Mf[`CORRSHIFTSZ-`NF-2]; + assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; + assign FpRound = Mf[`CORRSHIFTSZ-`NF-2]; end else if (`FPSIZES == 2) begin - assign FpG = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; - assign FpL = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; - assign FpR = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; + assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; end else if (`FPSIZES == 3) begin always_comb case (OutFmt) `FMT: begin - FpG = Mf[`CORRSHIFTSZ-`NF-1]; - FpL = Mf[`CORRSHIFTSZ-`NF]; - FpR = Mf[`CORRSHIFTSZ-`NF-2]; + FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; + FpRound = Mf[`CORRSHIFTSZ-`NF-2]; end `FMT1: begin - FpG = Mf[`CORRSHIFTSZ-`NF1-1]; - FpL = Mf[`CORRSHIFTSZ-`NF1]; - FpR = Mf[`CORRSHIFTSZ-`NF1-2]; + FpGuard = Mf[`CORRSHIFTSZ-`NF1-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF1]; + FpRound = Mf[`CORRSHIFTSZ-`NF1-2]; end `FMT2: begin - FpG = Mf[`CORRSHIFTSZ-`NF2-1]; - FpL = Mf[`CORRSHIFTSZ-`NF2]; - FpR = Mf[`CORRSHIFTSZ-`NF2-2]; + FpGuard = Mf[`CORRSHIFTSZ-`NF2-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF2]; + FpRound = Mf[`CORRSHIFTSZ-`NF2-2]; end default: begin - FpG = 1'bx; - FpL = 1'bx; - FpR = 1'bx; + FpGuard = 1'bx; + FpLsbRes = 1'bx; + FpRound = 1'bx; end endcase end else if (`FPSIZES == 4) begin always_comb case (OutFmt) 2'h3: begin - FpG = Mf[`CORRSHIFTSZ-`Q_NF-1]; - FpL = Mf[`CORRSHIFTSZ-`Q_NF]; - FpR = Mf[`CORRSHIFTSZ-`Q_NF-2]; + FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF]; + FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; end 2'h1: begin - FpG = Mf[`CORRSHIFTSZ-`D_NF-1]; - FpL = Mf[`CORRSHIFTSZ-`D_NF]; - FpR = Mf[`CORRSHIFTSZ-`D_NF-2]; + FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF]; + FpRound = Mf[`CORRSHIFTSZ-`D_NF-2]; end 2'h0: begin - FpG = Mf[`CORRSHIFTSZ-`S_NF-1]; - FpL = Mf[`CORRSHIFTSZ-`S_NF]; - FpR = Mf[`CORRSHIFTSZ-`S_NF-2]; + FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF]; + FpRound = Mf[`CORRSHIFTSZ-`S_NF-2]; end 2'h2: begin - FpG = Mf[`CORRSHIFTSZ-`H_NF-1]; - FpL = Mf[`CORRSHIFTSZ-`H_NF]; - FpR = Mf[`CORRSHIFTSZ-`H_NF-2]; + FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF]; + FpRound = Mf[`CORRSHIFTSZ-`H_NF-2]; end endcase end - assign G = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpG; - assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpL; - assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpR; + assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard; + assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes; + assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound; always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = G & (R|S|L);//round to nearest even + 3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero 3'b010: CalcPlus1 = Ms;//round down 3'b011: CalcPlus1 = ~Ms;//round up - 3'b100: CalcPlus1 = G;//round to nearest max magnitude + 3'b100: CalcPlus1 = Guard;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = R & (S|G);//round to nearest even + 3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero 3'b010: UfCalcPlus1 = Ms;//round down 3'b011: UfCalcPlus1 = ~Ms;//round up - 3'b100: UfCalcPlus1 = R;//round to nearest max magnitude + 3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase end // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (S|R|G); + assign Plus1 = CalcPlus1 & (Sticky|Round|Guard); assign FpPlus1 = Plus1&~(ToInt&CvtOp); - assign UfPlus1 = UfCalcPlus1 & (S|R); + assign UfPlus1 = UfCalcPlus1 & (Sticky|Round); // Compute rounded result if (`FPSIZES == 1) begin @@ -294,7 +293,7 @@ module round( always_comb case(PostProcSel) 2'b10: Me = FmaMe; // fma - 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt + 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt // 2'b01: Me = DivDone ? Qe : '0; // divide 2'b01: Me = Qe; // divide default: Me = '0; diff --git a/pipelined/src/fpu/postproc/shiftcorrection.sv b/pipelined/src/fpu/postproc/shiftcorrection.sv index 17218046..9e16d919 100644 --- a/pipelined/src/fpu/postproc/shiftcorrection.sv +++ b/pipelined/src/fpu/postproc/shiftcorrection.sv @@ -33,11 +33,11 @@ module shiftcorrection( input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction input logic FmaOp, input logic DivOp, - input logic DivResDenorm, + input logic DivResSubnorm, input logic [`NE+1:0] DivQe, - input logic DivDenormShiftPos, - input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection + input logic DivSubnormShiftPos, + input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + input logic FmaPreResultSubnorm, // is the result Subnormalized - calculated before LZA corection input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction output logic [`NE+1:0] Qe, @@ -45,27 +45,27 @@ module shiftcorrection( ); logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction logic [`CORRSHIFTSZ-1:0] CorrQmShifted; - logic ResDenorm; // is the result denormalized + logic ResSubnorm; // is the result Subnormalized logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction // LZA correction assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-2:1] : Shifted[`NORMSHIFTSZ-3:0]; - // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) + // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) assign CorrQmShifted = (LZAPlus1|(DivQe==1&~LZAPlus1)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; - // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits + // if the result of the divider was calculated to be Subnormalized, then the result was correctly normalized, so select the top shifted bits always_comb if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}}; - else if (DivOp&~DivResDenorm) Mf = CorrQmShifted; + else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent - // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}) & {`NE+2{~(FmaSZero|ResDenorm)}}; - // recalculate if the result is denormalized - assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1]; + // if plus1 If plus2 if said Subnorm but norm plus 1 if said Subnorm but norm plus 2 + assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, ~ResSubnorm&FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}}; + // recalculate if the result is Subnormalized + assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1]; // the quotent is in the range [.5,2) if there is no early termination - // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift - assign Qe = (DivResDenorm & DivDenormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1}; + // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift + assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/specialcase.sv b/pipelined/src/fpu/postproc/specialcase.sv index 19d60ba7..eb198c42 100644 --- a/pipelined/src/fpu/postproc/specialcase.sv +++ b/pipelined/src/fpu/postproc/specialcase.sv @@ -50,7 +50,7 @@ module specialcase( input logic Plus1, input logic DivByZero, input logic [`NE:0] CvtCe, // the calculated expoent - input logic Ws, // the res's sign + input logic Rs, // the res's sign input logic IntInvalid, Invalid, Overflow, // flags input logic CvtResUf, input logic [`NE-1:0] Re, // Res exponent @@ -69,7 +69,7 @@ module specialcase( // does the overflow result output the maximum normalized floating point number // output infinity if the input is infinity - assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws)); + assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs)); if (`FPSIZES == 1) begin @@ -83,9 +83,9 @@ module specialcase( assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end - assign OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = {Ws, Re, Rf}; + assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Rs, Re, Rf}; end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? if(`IEEE754) begin @@ -99,13 +99,13 @@ module specialcase( always_comb if(OutFmt) - if(OfResMax) OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; - else OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; + else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; else - if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; - else OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; + else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; + assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; end else if (`FPSIZES == 3) begin always_comb @@ -120,9 +120,9 @@ module specialcase( InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; + OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; end `FMT1: begin if(`IEEE754) begin @@ -133,9 +133,9 @@ module specialcase( end else begin InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; + UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; end `FMT2: begin if(`IEEE754) begin @@ -147,9 +147,9 @@ module specialcase( InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; + OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)}; + UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; end default: begin if(`IEEE754) begin @@ -179,9 +179,9 @@ module specialcase( InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; end - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; + OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; end 2'h1: begin if(`IEEE754) begin @@ -192,9 +192,9 @@ module specialcase( end else begin InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; + OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)}; + UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; end 2'h0: begin if(`IEEE754) begin @@ -206,9 +206,9 @@ module specialcase( InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; + OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)}; + UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; end 2'h2: begin if(`IEEE754) begin @@ -220,10 +220,10 @@ module specialcase( InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; end - OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)}; + OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)}; // zero is exact fi dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; + UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; end endcase @@ -237,7 +237,7 @@ module specialcase( // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); + assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); // output infinity with result sign if divide by zero if(`IEEE754) diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 8444a2c6..850837e2 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -38,7 +38,7 @@ module unpack ( output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) output logic XNaN, YNaN, ZNaN, // is XYZ a NaN output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN - output logic XDenorm, ZDenorm, // is XYZ denormalized + output logic XSubnorm, ZSubnorm, // is XYZ Subnormalized output logic XZero, YZero, ZZero, // is XYZ zero output logic XInf, YInf, ZInf, // is XYZ infinity output logic XExpMax // does X have the maximum exponent (NaN or Inf) @@ -59,7 +59,7 @@ module unpack ( unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); - // is the input denormalized - assign XDenorm = ~XExpNonZero & ~XFracZero; - assign ZDenorm = ~ZExpNonZero & ~ZFracZero; + // is the input Subnormalized + assign XSubnorm = ~XExpNonZero & ~XFracZero; + assign ZSubnorm = ~ZExpNonZero & ~ZFracZero; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index a66f8e61..07f1b299 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -54,7 +54,7 @@ module unpackinput ( assign Sgn = In[`FLEN-1]; // sign bit assign Frac = In[`NF-1:0]; // fraction (no assumed 1) assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero - assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. Denormalized numbers have effective biased exponent of 1 + assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. Subnormalized numbers have effective biased exponent of 1 assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported //***need better names for these constants @@ -64,7 +64,7 @@ module unpackinput ( // `NE | `NE1 length of exponent // `NF | `NF1 length of fraction // `BIAS | `BIAS1 exponent's bias value - // `FMT | `FMT1 precision's format value - Q=11 D=01 S=00 H=10 + // `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10 // Possible combinantions specified by spec: // double and single @@ -93,10 +93,10 @@ module unpackinput ( // 896 = 0011 1000 0000 // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values + // also need to take into account possible zero/Subnorm/inf/NaN values // extract the exponent, converting the smaller exponent into the larger precision if nessisary - // - if the original precision had a denormal number convert the exponent value 1 + // - if the original precision had a Subnormal number convert the exponent value 1 assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; // is the exponent all 1's @@ -112,7 +112,7 @@ module unpackinput ( // `NE | `NE1 | `NE2 length of exponent // `NF | `NF1 | `NF2 length of fraction // `BIAS | `BIAS1 | `BIAS2 exponent's bias value - // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 S=00 H=10 + // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10 // Possible combinantions specified by spec: // quad and double and single @@ -164,7 +164,7 @@ module unpackinput ( // 896 = 0011 1000 0000 // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values + // also need to take into account possible zero/Subnorm/inf/NaN values // convert the larger precision's exponent to use the largest precision's bias always_comb @@ -192,7 +192,7 @@ module unpackinput ( // `Q_NE | `D_NE | `S_NE | `H_NE length of exponent // `Q_NF | `D_NF | `S_NF | `H_NF length of fraction // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value - // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10 + // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10 // Check NaN boxing always_comb @@ -238,7 +238,7 @@ module unpackinput ( // 896 = 0011 1000 0000 // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values + // also need to take into account possible zero/Subnorm/inf/NaN values // convert the double precsion exponent into quad precsion // 1 is added to the exponent if the input is zero or subnormal @@ -264,7 +264,7 @@ module unpackinput ( // Output logic assign FracZero = ~|Frac; // is the fraction zero? - assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand + assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN? assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? assign Inf = ExpMax & FracZero &En; // is the input infinity? From 7223d1e05c50e96f79238ebb8fe6ad1cb7b87bba Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 6 Jan 2023 15:15:54 -0600 Subject: [PATCH 2/3] Added python script to post process performance counter metrics. --- bin/parseHPMC.py | 108 +++++++++++++++++++++++++++++++ pipelined/testbench/testbench.sv | 4 +- 2 files changed, 110 insertions(+), 2 deletions(-) create mode 100755 bin/parseHPMC.py diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py new file mode 100755 index 00000000..8830adf6 --- /dev/null +++ b/bin/parseHPMC.py @@ -0,0 +1,108 @@ +#!/usr/bin/python3 + +import os +import sys + +def ComputeCPI(benchmark): + 'Computes and inserts CPI into benchmark stats.' + (nameString, opt, dataDict) = benchmark + CPI = 1.0 * int(dataDict['Mcycle']) / int(dataDict['InstRet']) + dataDict['CPI'] = CPI + +def ComputeBranchDirMissRate(benchmark): + 'Computes and inserts branch direction miss prediction rate.' + (nameString, opt, dataDict) = benchmark + branchDirMissRate = 100.0 * int(dataDict['Br Dir Wrong']) / int(dataDict['Br Count']) + dataDict['BDMR'] = branchDirMissRate + +def ComputeBranchTargetMissRate(benchmark): + 'Computes and inserts branch target miss prediction rate.' + # *** this is wrong in the verilog test bench + (nameString, opt, dataDict) = benchmark + branchTargetMissRate = 100.0 * int(dataDict['Br Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump, JR, ret']) + int(dataDict['ret'])) + dataDict['BTMR'] = branchTargetMissRate + +def ComputeRASMissRate(benchmark): + 'Computes and inserts return address stack miss prediction rate.' + (nameString, opt, dataDict) = benchmark + RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['ret']) + dataDict['RASMPR'] = RASMPR + +def ComputeInstrClassMissRate(benchmark): + 'Computes and inserts instruction class miss prediction rate.' + (nameString, opt, dataDict) = benchmark + ClassMPR = 100.0 * int(dataDict['Instr Class Wrong']) / int(dataDict['InstRet']) + dataDict['ClassMPR'] = ClassMPR + +def ComputeICacheMissRate(benchmark): + 'Computes and inserts instruction class miss prediction rate.' + (nameString, opt, dataDict) = benchmark + ICacheMR = 100.0 * int(dataDict['I Cache Miss']) / int(dataDict['I Cache Access']) + dataDict['ICacheMR'] = ICacheMR + +def ComputeDCacheMissRate(benchmark): + 'Computes and inserts instruction class miss prediction rate.' + (nameString, opt, dataDict) = benchmark + DCacheMR = 100.0 * int(dataDict['D Cache Miss']) / int(dataDict['D Cache Access']) + dataDict['DCacheMR'] = DCacheMR + + +def printStats(benchmark): + (nameString, opt, dataDict) = benchmark + CPI = dataDict['CPI'] + BDMR = dataDict['BDMR'] + BTMR = dataDict['BTMR'] + RASMPR = dataDict['RASMPR'] + print('Test', nameString) + print('Compile configuration', opt) + print('CPI \t\t\t %1.2f' % CPI) + print('Branch Dir Pred Miss Rate %2.2f' % BDMR) + print('Branch Target Pred Miss Rate %2.2f' % BTMR) + print('RAS Miss Rate \t\t %1.2f' % RASMPR) + print('Instr Class Miss Rate %1.2f' % dataDict['ClassMPR']) + print('I Cache Miss Rate %1.4f' % dataDict['ICacheMR']) + print('D Cache Miss Rate %1.4f' % dataDict['DCacheMR']) + print() + + +# 1 find lines with Read memfile and extract test name +# 2 parse counters into a list of (name, value) tuples (dictionary maybe?) +# 3 process into useful data + # cache hit rates + # cache fill time + # branch predictor status + # hazard counts + # CPI + # instruction distribution + +# steps 1 and 2 +benchmarks = [] +transcript = open(sys.argv[1], 'r') +HPMClist = { } +testName = '' +for line in transcript.readlines(): + lineToken = line.split() + if(len(lineToken) > 3 and lineToken[1] == 'Read' and lineToken[2] == 'memfile'): + opt = lineToken[3].split('/')[-4] + testName = lineToken[3].split('/')[-1].split('.')[0] + HPMClist = { } + elif(len(lineToken) > 4 and lineToken[1][0:3] == 'Cnt'): + countToken = line.split('=')[1].split() + value = countToken[0] + name = ' '.join(countToken[1:]) + HPMClist[name] = value + elif ('is done' in line): + benchmarks.append((testName, opt, HPMClist)) + +#print(benchmarks[0]) + +for benchmark in benchmarks: + ComputeCPI(benchmark) + ComputeBranchDirMissRate(benchmark) + ComputeBranchTargetMissRate(benchmark) + ComputeRASMissRate(benchmark) + ComputeInstrClassMissRate(benchmark) + ComputeICacheMissRate(benchmark) + ComputeDCacheMissRate(benchmark) + printStats(benchmark) + diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 48cd58c8..e847e839 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -32,7 +32,7 @@ `include "wally-config.vh" `include "tests.vh" -`define PrintHPMCounters 0 +`define PrintHPMCounters 1 module testbench; parameter DEBUG=0; @@ -413,7 +413,7 @@ logic [3:0] dummy; "Br Dir Wrong", "Br Count", "Br Target Wrong", - "Jump, JR, ret", + "Jump, JR, Jal", "RAS Wrong", "ret", "Instr Class Wrong", From c1c4024b4bc65ffa476df2196532c5ae3e63ee59 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 6 Jan 2023 18:04:49 -0600 Subject: [PATCH 3/3] Fancy plot for branch predictor. --- bin/parseHPMC.py | 133 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 38 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 8830adf6..3e4132e0 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -2,6 +2,7 @@ import os import sys +import matplotlib.pyplot as plt def ComputeCPI(benchmark): 'Computes and inserts CPI into benchmark stats.' @@ -45,7 +46,16 @@ def ComputeDCacheMissRate(benchmark): (nameString, opt, dataDict) = benchmark DCacheMR = 100.0 * int(dataDict['D Cache Miss']) / int(dataDict['D Cache Access']) dataDict['DCacheMR'] = DCacheMR - + +def ComputeAll(benchmarks): + for benchmark in benchmarks: + ComputeCPI(benchmark) + ComputeBranchDirMissRate(benchmark) + ComputeBranchTargetMissRate(benchmark) + ComputeRASMissRate(benchmark) + ComputeInstrClassMissRate(benchmark) + ComputeICacheMissRate(benchmark) + ComputeDCacheMissRate(benchmark) def printStats(benchmark): (nameString, opt, dataDict) = benchmark @@ -64,45 +74,92 @@ def printStats(benchmark): print('D Cache Miss Rate %1.4f' % dataDict['DCacheMR']) print() +def ProcessFile(fileName): + '''Extract preformance counters from a modelsim log. Outputs a list of tuples for each test/benchmark. + The tuple contains the test name, optimization characteristics, and dictionary of performance counters.''' + # 1 find lines with Read memfile and extract test name + # 2 parse counters into a list of (name, value) tuples (dictionary maybe?) + benchmarks = [] + transcript = open(fileName, 'r') + HPMClist = { } + testName = '' + for line in transcript.readlines(): + lineToken = line.split() + if(len(lineToken) > 3 and lineToken[1] == 'Read' and lineToken[2] == 'memfile'): + opt = lineToken[3].split('/')[-4] + testName = lineToken[3].split('/')[-1].split('.')[0] + HPMClist = { } + elif(len(lineToken) > 4 and lineToken[1][0:3] == 'Cnt'): + countToken = line.split('=')[1].split() + value = countToken[0] + name = ' '.join(countToken[1:]) + HPMClist[name] = value + elif ('is done' in line): + benchmarks.append((testName, opt, HPMClist)) + return benchmarks -# 1 find lines with Read memfile and extract test name -# 2 parse counters into a list of (name, value) tuples (dictionary maybe?) -# 3 process into useful data - # cache hit rates - # cache fill time - # branch predictor status - # hazard counts - # CPI - # instruction distribution +def FormatToPlot(currBenchmark): + names = [] + values = [] + for config in currBenchmark: + print ('config' , config) + names.append(config[0]) + values.append(config[1]) + return (names, values) -# steps 1 and 2 -benchmarks = [] -transcript = open(sys.argv[1], 'r') -HPMClist = { } -testName = '' -for line in transcript.readlines(): - lineToken = line.split() - if(len(lineToken) > 3 and lineToken[1] == 'Read' and lineToken[2] == 'memfile'): - opt = lineToken[3].split('/')[-4] - testName = lineToken[3].split('/')[-1].split('.')[0] - HPMClist = { } - elif(len(lineToken) > 4 and lineToken[1][0:3] == 'Cnt'): - countToken = line.split('=')[1].split() - value = countToken[0] - name = ' '.join(countToken[1:]) - HPMClist[name] = value - elif ('is done' in line): - benchmarks.append((testName, opt, HPMClist)) +if(sys.argv[1] == '-b'): + configList = [] + for config in sys.argv[2::]: + benchmarks = ProcessFile(config) + ComputeAll(benchmarks) + configList.append((config.split('.')[0], benchmarks)) -#print(benchmarks[0]) + # Merge all configruations into a single list + benchmarkAll = [] + for (config, benchmarks) in configList: + print(config) + for benchmark in benchmarks: + (nameString, opt, dataDict) = benchmark + benchmarkAll.append((nameString, opt, config, dataDict)) -for benchmark in benchmarks: - ComputeCPI(benchmark) - ComputeBranchDirMissRate(benchmark) - ComputeBranchTargetMissRate(benchmark) - ComputeRASMissRate(benchmark) - ComputeInstrClassMissRate(benchmark) - ComputeICacheMissRate(benchmark) - ComputeDCacheMissRate(benchmark) - printStats(benchmark) + # now extract all branch prediction direction miss rates for each + # namestring + opt, config + benchmarkDict = { } + for benchmark in benchmarkAll: + (name, opt, config, dataDict) = benchmark + if name+'_'+opt in benchmarkDict: + benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) + else: + benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] + + size = len(benchmarkDict) + index = 1 + print('Number of plots', size) + for benchmarkName in benchmarkDict: + currBenchmark = benchmarkDict[benchmarkName] + (names, values) = FormatToPlot(currBenchmark) + print(names, values) + plt.subplot(6, 7, index) + plt.bar(names, values) + plt.title(benchmarkName) + plt.ylabel('BR Dir Miss Rate (%)') + #plt.xlabel('Predictor') + index += 1 + #plt.tight_layout() + plt.show() + + +else: + # steps 1 and 2 + benchmarks = ProcessFile(sys.argv[1]) + # 3 process into useful data + # cache hit rates + # cache fill time + # branch predictor status + # hazard counts + # CPI + # instruction distribution + ComputeAll(benchmarks) + for benchmark in benchmarks: + printStats(benchmark)