From afcddf7035f089ac379797155ed3ba776e36b169 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 19 Jul 2022 23:44:37 +0000 Subject: [PATCH 1/2] oprimized zeros and replaced complex ?: with always_comb --- pipelined/src/fpu/fcvt.sv | 21 ++++++--- pipelined/src/fpu/fmashiftcalc.sv | 4 -- pipelined/src/fpu/negateintres.sv | 9 +++- pipelined/src/fpu/resultsign.sv | 25 ++++++++--- pipelined/src/fpu/shiftcorrection.sv | 5 ++- pipelined/src/fpu/specialcase.sv | 65 ++++++++++++++++++---------- 6 files changed, 85 insertions(+), 44 deletions(-) diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index b9932523..9d7f2d62 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -103,7 +103,7 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} : {Xm, {`CVTLEN-`NF{1'b0}}}; assign LzcIn = LzcInFull[`CVTLEN-1:0]; @@ -125,9 +125,10 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : - ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros); + always_comb + if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}}; + else if (ResDenormUf&~IntToFp) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; + else ShiftAmt = LeadingZeros; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -150,7 +151,9 @@ module fcvt ( assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); end else if (`FPSIZES == 2) begin - assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + logic [`NE-2:0] NewBiasToFp; + assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; end else if (`FPSIZES == 3) begin logic [`NE-2:0] NewBiasToFp; @@ -177,7 +180,7 @@ module fcvt ( // select the old exponent // int -> fp : largest bias + XLEN // fp -> ??? : XExp - assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe; + assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe; // calculate CalcExp // fp -> fp : @@ -222,7 +225,11 @@ module fcvt ( // - if 64-bit : check the msb of the 64-bit integer input and if it's signed // - if 32-bit : check the msb of the 32-bit integer input and if it's signed // - otherwise: the floating point input's sign - assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs; + always_comb + if(IntToFp) + if(Int64) Cs = Int[`XLEN-1]&Signed; + else Cs = Int[31]&Signed; + else Cs = Xs; endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 79953b21..d598efb7 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -42,7 +42,6 @@ module fmashiftcalc( output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero ); - logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias logic [`NE+1:0] BiasCorr; @@ -149,9 +148,6 @@ module fmashiftcalc( // Determine if the result is denormal // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero; - // Determine the shift needed for denormal results - // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/negateintres.sv b/pipelined/src/fpu/negateintres.sv index dde515b9..7a696b37 100644 --- a/pipelined/src/fpu/negateintres.sv +++ b/pipelined/src/fpu/negateintres.sv @@ -42,7 +42,12 @@ module negateintres( // round and negate the positive res if needed assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; - assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] : - Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32]; + always_comb + if(Signed) + if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN:`XLEN-1]; + else CvtNegResMsbs = CvtNegRes[32:31]; + else + if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN+1:`XLEN]; + else CvtNegResMsbs = CvtNegRes[33:32]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index b8019b98..c2912ece 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -46,11 +46,21 @@ module resultsign( logic Zeros; logic Infs; - // Determine the sign if the sum is zero - // if cancelation then 0 unless round to -infinity - // if multiply then Psgn - // otherwise psign - assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + // The IEEE754-2019 standard specifies: + // - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity + // - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result + // - if x = +0 or -0 then x+x=x and x-(-x)=x + // - the sign of a product is the exclisive or or the opperand's signs + // Zero sign will only be selected if: + // - P=Z and a cancelation occurs - exact zero + // - Z is zero and P is zero - exact zero + // - P is killed and Z is zero - Psgn + // - Z is killed and P is zero - impossible + // Zero sign calculation: + // - if a multiply opperation is done, then use the products sign(Ps) + // - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign) + // - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign + assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive @@ -58,6 +68,9 @@ module resultsign( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign Infs = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms; + always_comb + if(InfIn&FmaOp) Ws = Infs; + else if(FmaSZero&FmaOp) Ws = Zeros; + else Ws = Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 50cffb07..514edbee 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -55,7 +55,10 @@ module shiftcorrection( // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + always_comb + if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; + else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted; + else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv index 3c28eae2..6014962a 100644 --- a/pipelined/src/fpu/specialcase.sv +++ b/pipelined/src/fpu/specialcase.sv @@ -95,9 +95,14 @@ module specialcase( end else begin assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end - - assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : - OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + + always_comb + if(OutFmt) + if(OfResMax) OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; + else OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + else + if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; + else OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; @@ -234,20 +239,21 @@ module specialcase( assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); // output infinity with result sign if divide by zero - if(`IEEE754) begin - assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes : - YNaN&~CvtOp ? YNaNRes : - ZNaN&FmaOp ? ZNaNRes : - Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end else begin - assign PostProcRes = NaNIn|Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end + if(`IEEE754) + always_comb + if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes; + else if(YNaN&~CvtOp) PostProcRes = YNaNRes; + else if(ZNaN&FmaOp) PostProcRes = ZNaNRes; + else if(Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + else + always_comb + if(NaNIn|Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; /////////////////////////////////////////////////////////////////////////////////////// // @@ -272,10 +278,17 @@ module specialcase( // unsigned | 2^32-1 | 2^64-1 | // // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive - Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive - {`XLEN{1'b1}};// unsigned positive + always_comb + if(Signed) + if(Xs&~XNaN) // signed negitive + if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; + else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; + else // signed positive + if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; + else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; + else + if(Xs&~XNaN) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive + else OfIntRes = {`XLEN{1'b1}}; // unsigned positive // select the integer output @@ -284,7 +297,11 @@ module specialcase( // - if rounding and signed opperation and negitive input, output -1 // - otherwise output a rounded 0 // - otherwise output the normal res (trmined and sign extended if nessisary) - assign FCvtIntRes = IntInvalid ? OfIntRes : - CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? - Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; + always_comb + if(IntInvalid) FCvtIntRes = OfIntRes; + else if(CvtCe[`NE]) + if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}}; + else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1}; + else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0]; + else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; endmodule \ No newline at end of file From db39a05abc7bd3f7519d62f44413dfdd6c3f8b45 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 20 Jul 2022 01:36:25 +0000 Subject: [PATCH 2/2] small changes --- pipelined/src/fpu/divsqrt.sv | 2 +- pipelined/src/fpu/srt.sv | 12 ++++++------ pipelined/srt/srt.sv | 29 +++++++++++++++++------------ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index cbf7f95f..ffc60026 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -41,7 +41,7 @@ module divsqrt( input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, + input logic StallE, output logic DivStickyM, output logic DivBusy, output logic DivDone, diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 9e031511..ee5ae9a3 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -34,18 +34,18 @@ module srt( input logic clk, input logic DivStart, input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, + input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1-(`RADIX/4):0] Quot, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem ); diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 949335bf..13a59d84 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -55,7 +55,7 @@ module srt ( logic qp, qz, qn; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; - logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn; + logic [`DIVLEN+3:0] X, Dpreproc, C, F, S, SM, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic intSign; @@ -90,8 +90,9 @@ module srt ( // If only implementing division, use divide otfc // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot); // otherwise use sotfc - creg sotfcC(clk, Start, C); - sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F); + creg sotfcC(clk, Start, Sqrt, C); + sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM); + fsel2 fsel(qp, qn, C, S, SM, F); // Adder input selection assign AddIn = Sqrt ? F : Dsel; @@ -214,11 +215,16 @@ module fsel2 ( // Generate for both positive and negative bits assign FP = ~S & C; assign FN = SM | (C & (~C << 2)); - assign FZ = {(`DIVLEN+4){1'b0}}; + assign FZ = '0; // Choose which adder input will be used - assign F = sp ? FP : (sn ? FN : FZ); + always_comb + if (sp) F = FP; + else if (sn) F = FN; + else F = FZ; + + // assign F = sp ? FP : (sn ? FN : FZ); endmodule @@ -266,17 +272,18 @@ module sotfc2( input logic clk, input logic Start, input logic sp, sn, + input logic Sqrt, input logic [`DIVLEN+3:0] C, output logic [`DIVLEN-2:0] Sq, - output logic [`DIVLEN+3:0] F + output logic [`DIVLEN+3:0] S, SM ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. // Use this otfc for division and square root. - logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; + logic [`DIVLEN+3:0] SNext, SMNext, SMux; flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin @@ -292,9 +299,6 @@ module sotfc2( end end assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; - - fsel2 fsel(sp, sn, C, S, SM, F); - endmodule ////////////////////////// @@ -302,11 +306,12 @@ endmodule ////////////////////////// module creg(input logic clk, input logic Start, + input logic Sqrt, output logic [`DIVLEN+3:0] C ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule