forked from Github_Repos/cvw
266 lines
12 KiB
Systemverilog
Executable File
266 lines
12 KiB
Systemverilog
Executable File
// The rounder takes as inputs a 64-bit value to be rounded, A, the
|
|
// exponent of the value to be rounded, the sign of the final result, Sign,
|
|
// the precision of the results, P, and the two-bit rounding mode, rm.
|
|
// It produces a rounded 52-bit result, Z, the exponent of the rounded
|
|
// result, Z_exp, and a flag that indicates if the result was rounded,
|
|
// Inexact. The rounding mode has the following values.
|
|
// rm Modee
|
|
// 00 round-to-nearest-even
|
|
// 01 round-toward-zero
|
|
// 10 round-toward-plus infinity
|
|
// 11 round-toward-minus infinity
|
|
// The rounding algorithm determines if '1' should be added to the
|
|
// truncated signficant result, based on three significant bits
|
|
// (least (L), round (R) and sticky (S)), the rounding mode (rm)
|
|
// and the sign of the final result (Sign). Visually, L and R appear as
|
|
// xxxxxL,Rxxxxxxx
|
|
// where , denotes the rounding boundary. S is the logical OR of all the
|
|
// bits to the right of R.
|
|
|
|
module rounder (Result, DenormIO, Flags, rm, P, OvEn,
|
|
UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp,
|
|
norm_shift, A, exponent_postsum, A_Norm, B_Norm, exp_A_unmodified, exp_B_unmodified,
|
|
normal_overflow, normal_underflow, swap, op_type, sum);
|
|
|
|
input [2:0] rm;
|
|
input P;
|
|
input OvEn;
|
|
input UnEn;
|
|
input exp_valid;
|
|
input [3:0] sel_inv;
|
|
input Invalid;
|
|
input DenormIn;
|
|
input convert;
|
|
input Asign;
|
|
input [10:0] Aexp;
|
|
input [5:0] norm_shift;
|
|
input [63:0] A;
|
|
input [10:0] exponent_postsum;
|
|
input A_Norm;
|
|
input B_Norm;
|
|
input [11:0] exp_A_unmodified;
|
|
input [11:0] exp_B_unmodified;
|
|
input normal_overflow;
|
|
input normal_underflow;
|
|
input swap;
|
|
input [3:0] op_type;
|
|
input [63:0] sum;
|
|
|
|
output [63:0] Result;
|
|
output DenormIO;
|
|
output [4:0] Flags;
|
|
|
|
wire Rsign;
|
|
wire Sticky_out;
|
|
wire [51:0] ShiftMant;
|
|
wire [63:0] ShiftMant_64;
|
|
wire [10:0] Rexp;
|
|
wire [10:0] Rexp_denorm;
|
|
wire [11:0] Texp; //Parallelized for denorm exponent
|
|
wire [11:0] Texp_addone; //results
|
|
wire [11:0] Texp_subone;
|
|
wire [51:0] Rmant;
|
|
wire [51:0] Tmant;
|
|
wire Rzero;
|
|
wire VSS = 1'b0;
|
|
wire VDD = 1'b1;
|
|
wire [51:0] B; // Value used to add the "ones"
|
|
wire [11:0] B_12_overflow; // Value used to add one to exponent
|
|
wire [11:0] B_12_underflow; // Value used to subtract one from exponent
|
|
wire S_SP; // Single precision sticky bit
|
|
wire S_DP; // Double precision sticky bit
|
|
wire S; // Actual sticky bit
|
|
wire R; // Round bit
|
|
wire L; // Least significant bit
|
|
wire add_one; // '1' if one should be added
|
|
wire UnFlow_SP, UnFlow_DP, UnderFlow;
|
|
wire OvFlow_SP, OvFlow_DP, OverFlow;
|
|
wire Inexact;
|
|
wire Round_zero;
|
|
wire Infinite;
|
|
wire VeryLarge;
|
|
wire Largest;
|
|
wire Adj_exp;
|
|
wire Valid;
|
|
wire NaN;
|
|
wire Cout;
|
|
wire Cout_overflow;
|
|
wire Texp_l7z;
|
|
wire Texp_l7o;
|
|
wire OvCon;
|
|
|
|
// Determine the sticky bits for double and single precision
|
|
assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0];
|
|
assign S_SP = S_DP |A[38]|A[37]|A[36]|A[35]|A[34]|A[33]|A[32]|A[31]|A[30]|
|
|
A[29]|A[28]|A[27]|A[26]|A[25]|A[24]|A[23]|A[22]|A[21]|A[20]|
|
|
A[19]|A[18]|A[17]|A[16]|A[15]|A[14]|A[13]|A[12]|A[11]|A[10];
|
|
|
|
// Set the least (L), round (R), and sticky (S) bits based on
|
|
// the precision.
|
|
assign {L, R, S} = P ? {A[40],A[39],S_SP} : {A[11],A[10],S_DP};
|
|
|
|
// Add one if ((the rounding mode is round-to-nearest) and (R is one) and
|
|
// (S or L is one)) or ((the rounding mode is towards plus or minus
|
|
// infinity (rm[1] = 1)) and (the sign and rm[0] are the same) and
|
|
// (R or S is one)).
|
|
|
|
assign add_one = ~rm[2] & ((~rm[1]&~rm[0]&R&(L|S)) | (rm[1]&(Asign^~rm[0])&(R|S))) | (rm[2] & R);
|
|
|
|
// Add one using a 52-bit adder. The one is added to the LSB B[0] for
|
|
// double precision or to B[29] for single precision.
|
|
// This could be simplified by using a specialized adder.
|
|
// The current adder is actually 64-bits. The leading one
|
|
// for normalized results in not included in the addition.
|
|
assign B = {{22{VSS}}, add_one&P, {28{VSS}}, add_one&~P};
|
|
assign B_12_overflow = {8'h0, 3'b0, normal_overflow};
|
|
assign B_12_underflow = {8'h0, 3'b0, normal_underflow};
|
|
|
|
cla52 add1(Tmant, Cout, A[62:11], B); //***adder
|
|
|
|
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder
|
|
|
|
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder
|
|
|
|
// Now that rounding is done, we compute the final exponent
|
|
// and test for special cases.
|
|
|
|
// Compute the value of the exponent by subtracting the shift
|
|
// value from the previous exponent and then adding 2 + cout.
|
|
// If needed this could be optimized to used a specialized
|
|
// adder.
|
|
|
|
assign Texp = DenormIn ? ({1'b0, exponent_postsum}) : ({VSS, Aexp} - {{6{VSS}}, norm_shift} +{{10{VSS}}, VDD, Cout});
|
|
|
|
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
|
|
// all ones. To encourage sharing with single precision overflow detection,
|
|
// the lower 7 bits are tested separately.
|
|
assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
|
|
assign OvFlow_DP = Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o;
|
|
|
|
// Overflow occurs for single precision if (Texp[10] is one) and
|
|
// ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0]
|
|
// are all ones.
|
|
assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
|
|
|
|
// Underflow occurs for double precision if (Texp[11] is one) or Texp[10] to
|
|
// Texp[0] are all zeros.
|
|
assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
|
|
assign UnFlow_DP = Texp[11] | ~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
|
|
|
|
// Underflow occurs for single precision if (Texp[10] is zero) and
|
|
// (Texp[9] or Texp[8] or Texp[7]) is zero.
|
|
assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z));
|
|
|
|
// Set the overflow and underflow flags. They should not be set if
|
|
// the input was infinite or NaN or the output of the adder is zero.
|
|
// 00 = Valid
|
|
// 10 = NaN
|
|
assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
|
|
assign NaN = ~sel_inv[2]&~sel_inv[1]& sel_inv[0];
|
|
assign UnderFlow = ((P & UnFlow_SP | UnFlow_DP)&Valid&exp_valid) |
|
|
(~Aexp[10]&Aexp[9]&Aexp[8]&Aexp[7]&~Aexp[6]
|
|
&~Aexp[5]&~Aexp[4]&~Aexp[3]&~Aexp[2]
|
|
&~Aexp[1]&~Aexp[0]&sel_inv[3]);
|
|
assign OverFlow = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid;
|
|
|
|
// The DenormIO is set if underflow has occurred or if their was a
|
|
// denormalized input.
|
|
assign DenormIO = DenormIn | UnderFlow;
|
|
|
|
// The final result is Inexact if any rounding occurred ((i.e., R or S
|
|
// is one), or (if the result overflows ) or (if the result underflows and the
|
|
// underflow trap is not enabled)) and (value of the result was not previous set
|
|
// by an exception case).
|
|
assign Inexact = (R|S|OverFlow|(UnderFlow&~UnEn))&Valid;
|
|
|
|
// Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0,
|
|
// Invlalid.
|
|
assign Flags = {UnderFlow, VSS, OverFlow, Invalid, Inexact};
|
|
|
|
// Determine the final result.
|
|
|
|
// The sign of the final result is one if the result is not zero and
|
|
// the sign of A is one, or if the result is zero and the the rounding
|
|
// mode is round-to-minus infinity. The final result is zero, if exp_valid
|
|
// is zero. If underflow occurs, then the result is set to zero.
|
|
//
|
|
// For Zero (goes equally for subtraction although
|
|
// signs may alter operands sign):
|
|
// -0 + -0 = -0 (always)
|
|
// +0 + +0 = +0 (always)
|
|
// -0 + +0 = +0 (for RN, RZ, RU)
|
|
// -0 + +0 = -0 (for RD)
|
|
assign Rzero = ~exp_valid | UnderFlow;
|
|
assign Rsign = DenormIn ?
|
|
( ~(op_type[2] | op_type[1] | op_type[0]) ?
|
|
( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ?
|
|
~Asign : Asign)
|
|
: ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ?
|
|
(normal_underflow ? ~Asign : Asign) : Asign)
|
|
)
|
|
: ( ((Asign&exp_valid |
|
|
(sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] |
|
|
sel_inv[2]&sel_inv[1]&~sel_inv[0] |
|
|
~exp_valid&rm[1]&rm[0]&~sel_inv[2] |
|
|
UnderFlow&rm[1]&rm[0]) & ~convert) & ~sel_inv[3]) |
|
|
(Asign & sel_inv[3]) );
|
|
|
|
// The exponent of the final result is zero if the final result is
|
|
// zero or a denorm, all ones if the final result is NaN or Infinite
|
|
// or overflow occurred and the magnitude of the number is
|
|
// not rounded toward from zero, and all ones with an LSB of zero
|
|
// if overflow occurred and the magnitude of the number is
|
|
// rounded toward zero. If the result is single precision,
|
|
// Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
|
|
// and overflow occurs and the operation is not conversion, bits 10 and 9 are
|
|
// inverted for double precision, and bits 7 and 6 are inverted for single precision.
|
|
assign Round_zero = ~rm[1]&rm[0] | ~Asign&rm[0] | Asign&rm[1]&~rm[0];
|
|
assign VeryLarge = OverFlow & ~OvEn;
|
|
assign Infinite = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]);
|
|
assign Largest = VeryLarge & Round_zero;
|
|
assign Adj_exp = OverFlow & OvEn & ~convert;
|
|
assign Rexp[10:1] = ({10{~Valid}} |
|
|
{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8],
|
|
(Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} |
|
|
{10{VeryLarge}})&{10{~Rzero | NaN}};
|
|
assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
|
|
|
|
// The denormalized rounded exponent uses the overflow/underflow values
|
|
// computed in the fpadd component to round the exponent up or down
|
|
// Depending on the operation and the signs of the orignal operands,
|
|
// underflow may or may not be needed to round.
|
|
assign Rexp_denorm = DenormIn ?
|
|
((~op_type[2] & ~op_type[1] & op_type[0]) ?
|
|
( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ?
|
|
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
|
|
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
|
|
: ( ((A_Norm != B_Norm) & (exp_A_unmodified[11] != exp_B_unmodified[11])) ?
|
|
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
|
|
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
|
|
) :
|
|
(op_type[3]) ? exp_A_unmodified[10:0] : Rexp; //KEP used to be all of exp_A_unmodified
|
|
|
|
// If the result is zero or infinity, the mantissa is all zeros.
|
|
// If the result is NaN, the mantissa is 10...0
|
|
// If the result the largest floating point number, the mantissa
|
|
// is all ones. Otherwise, the mantissa is not changed.
|
|
// If operation is denormalized, take the mantissa directly from
|
|
// its normalized value.
|
|
assign Rmant[51] = Largest | NaN | (Tmant[51]&~Infinite&~Rzero);
|
|
assign Rmant[50:0] = {51{Largest}} | (Tmant[50:0]&{51{~Infinite&Valid&~Rzero}});
|
|
|
|
assign ShiftMant = A[51:0];
|
|
|
|
// For single precision, the 8 least significant bits of the exponent
|
|
// and 23 most significant bits of the mantissa contain bits used
|
|
// for the final result. A double precision result is returned if
|
|
// overflow has occurred, the overflow trap is enabled, and a conversion
|
|
// is being performed.
|
|
assign OvCon = OverFlow & OvEn & convert;
|
|
|
|
assign Result = (op_type[3]) ? {A[63:0]} : (DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : ((P&~OvCon) ? {Rsign, Rexp[7:0], Rmant[51:29], {32{VSS}}}
|
|
: {Rsign, Rexp, Rmant}));
|
|
|
|
endmodule // rounder
|
|
|