Various bugs fixed in FMA

This commit is contained in:
Katherine Parry 2021-04-13 18:27:13 +00:00
parent 03bb37a849
commit ef011496a7
20 changed files with 136386 additions and 307226 deletions

View File

@ -48,7 +48,7 @@ module add(r, s, t, sum,
// Compound adder
// Consists of 3:2 CSA followed by long compound CPA
assign prodshifted = killprod ? 0 : {56'b0, r2, 2'b0} + {56'b0, s2, 2'b0};
assign prodshifted = killprod ? 0 : {56'b0, r2+s2, 2'b0};
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above

View File

@ -56,7 +56,7 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
// addend on right shifts. Handle special cases of shifting
// by too much.
always @(aligncnt or zman or zdenorm)
always @(aligncnt or xzero or yzero or zman or zdenorm or zzero)
begin
// Default to clearing sticky bits
@ -67,26 +67,23 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
killprod = xzero | yzero;
// d = aligncnt
// p = 53
if ($signed(aligncnt) <= $signed(-103)) begin //d<=-2p+1
if ($signed(aligncnt) <= $signed(-105)) begin //d<=-2p+1
//product ancored case with saturated shift
sumshift = 163; // 3p+4
sumshiftzero = 0;
shift = {~zdenorm,zman,163'b0} >> sumshift;
shift = {1'b1,zman,163'b0} >> sumshift;
t = zzero ? 0 : {shift[215:52]};
bs = |(shift[51:0]);
//zexpsel = 0;
end else if($signed(aligncnt) <= $signed(1)) begin // -2p+1<d<=2
// set d<=2 to d<=0
end else if($signed(aligncnt) <= $signed(2)) begin // -2p+1<d<=2
// product ancored or cancellation
// warning: set to 55 rather then 56. was there a typo in the book?
sumshift = 57-aligncnt; // p + 3 - d
sumshift = 57-aligncnt; // p + 2 - d
sumshiftzero = 0;
shift = {~zdenorm,zman,163'b0} >> sumshift;
t = zzero ? 0 : {shift[215:52]};
bs = |(shift[51:0]);
//zexpsel = 0;
end else if ($signed(aligncnt)<=$signed(55)) begin // 2 < d <= p+2
// another typo in book? above was 55 changed to 52
// addend ancored case
// used to be 56 \/ somthing doesn't seem right too many typos
sumshift = 57-aligncnt;

View File

@ -0,0 +1,55 @@
module booth(xExt, choose, add1, e, pp);
/////////////////////////////////////////////////////////////////////////////
input [53:0] xExt; // multiplicand xExt
input [2:0] choose; // bits needed to choose which encoding
output [1:0] add1; // do you add 1
output e;
output [54:0] pp; // the resultant encoding
logic [54:0] pp, temp;
logic e;
logic [1:0] add1;
logic [53:0] negx;
//logic temp;
assign negx = ~xExt;
always @(choose, xExt, negx)
case (choose)
3'b000 : pp = 55'b0; // 0
3'b001 : pp = {1'b0, xExt}; // 1
3'b010 : pp = {1'b0, xExt}; // 1
3'b011 : pp = {xExt, 1'b0}; // 2
3'b100 : pp = {negx, 1'b0}; // -2
3'b101 : pp = {1'b1, negx}; // -1
3'b110 : pp = {1'b1, negx}; // -1
3'b111 : pp = 55'hfffffffffffffff; // -0
endcase
always @(choose, xExt, negx)
case (choose)
3'b000 : e = 0; // 0
3'b001 : e = 0; // 1
3'b010 : e = 0; // 1
3'b011 : e = 0; // 2
3'b100 : e = 1; // -2
3'b101 : e = 1; // -1
3'b110 : e = 1; // -1
3'b111 : e = 1; // -0
endcase
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
// assign add1 = choose[2];
always @(choose)
case (choose)
3'b000 : add1 = 2'b0; // 0
3'b001 : add1 = 2'b0; // 1
3'b010 : add1 = 2'b0; // 1
3'b011 : add1 = 2'b0; // 2
3'b100 : add1 = 2'b10; // -2
3'b101 : add1 = 2'b1; // -1
3'b110 : add1 = 2'b1; // -1
3'b111 : add1 = 2'b1; // -0
endcase
endmodule

View File

@ -0,0 +1,90 @@
module add3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
parameter BITS = 4;
input [BITS-1:0] a;
input [BITS-1:0] b;
input [BITS-1:0] c;
output [BITS-1:0] carry;
output [BITS-1:0] sum;
genvar i;
generate
for(i= 0; i<BITS; i=i+1) begin
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
end
endgenerate
endmodule
module add4comp2(a, b, c, d, carry, sum);
/////////////////////////////////////////////////////////////////////////////
parameter BITS = 4;
input [BITS-1:0] a;
input [BITS-1:0] b;
input [BITS-1:0] c;
input [BITS-1:0] d;
output [BITS:0] carry;
output [BITS-1:0] sum;
logic [BITS-1:0] cout;
logic carryTmp;
genvar i;
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
generate
for(i= 1; i<BITS-1; i=i+1) begin
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
end
endgenerate
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
assign carry[BITS-1] = carryTmp & cout[BITS-1];
assign carry[BITS] = carryTmp ^ cout[BITS-1];
endmodule
module sng3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
input a;
input b;
input c;
output carry;
output sum;
logic axorb;
assign axorb = a ^ b;
assign sum = axorb ^ c;
assign carry = axorb ? c : a;
endmodule
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into pass gate 4:2 counters?
input a;
input b;
input c;
input d;
input cin;
output cout;
output carry;
output sum;
logic TmpSum;
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
endmodule

View File

@ -17,7 +17,7 @@
/////////////////////////////////////////////////////////////////////////////
module expgen(xexp, yexp, zexp,
killprod, sumzero, resultdenorm, normcnt, infinity,
invalid, overflow, underflow, inf, xzero, yzero,expplus1,
FmaFlagsM, inf, xzero, yzero,expplus1,
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
aligncnt, wexp,
prodof, sumof, sumuf, denorm0, ae);
@ -31,9 +31,7 @@ module expgen(xexp, yexp, zexp,
input resultdenorm; // postnormalize rounded result
input [8:0] normcnt; // normalization shift count
input infinity; // generate infinity on overflow
input invalid; // Result invalid
input overflow; // Result overflowed
input underflow; // Result underflowed
input [4:0] FmaFlagsM; // Result invalid
input inf; // Some input is infinity
input nan; // Some input is NaN
input [12:0] de0; // X is NaN NaN
@ -121,10 +119,10 @@ module expgen(xexp, yexp, zexp,
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
assign specialres = invalid | nan ? nanres : // KEP added nan
overflow ? infinityres :
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 11'b11111111111 :
underflow ? 11'b0 : 11'bx;
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;

View File

@ -10,12 +10,13 @@
/////////////////////////////////////////////////////////////////////////////
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
psign, zsign, xzero, yzero, zzero, vbits, killprod,
inf, nan, invalid, overflow, underflow, inexact);
inf, nan, FmaFlagsM,sticky);
/////////////////////////////////////////////////////////////////////////////
input xnan; // X is NaN
input ynan; // Y is NaN
input znan; // Z is NaN
input sticky; // X is Inf
input xinf; // X is Inf
input yinf; // Y is Inf
input zinf; // Z is Inf
@ -31,10 +32,7 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
input [1:0] vbits; // R and S bits of result
output inf; // Some source is Inf
output nan; // Some source is NaN
output invalid; // Result is invalid
output overflow; // Result overflowed
output underflow; // Result underflowed
output inexact; // Result is not an exact number
output [4:0] FmaFlagsM;
// Internal nodes
@ -55,33 +53,36 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
assign prodinf = prodof && ~xnan && ~ynan;
//KEP added if the product is infinity then sum is infinity
assign suminf = prodinf | sumof && ~xnan && ~ynan && ~znan;
assign suminf = sumof && ~xnan && ~ynan && ~znan;
// Set invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
assign FmaFlagsM[3] = 0; // divide by zero flag
// Set the overflow flag for the following cases:
// 1) Rounded multiply result would be out of bounds
// 2) Rounded add result would be out of bounds
assign overflow = suminf && ~inf;
assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases:
// 1) Any input is denormalized
// 2) Output would be denormalized or smaller
assign underflow = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
// Set the inexact flag for the following cases:
// 1) Multiplication inexact
// 2) Addition inexact
// One of these cases occurred if the R or S bit is set
assign inexact = (vbits[0] || vbits[1] || suminf) && ~(inf || nan);
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
endmodule

View File

@ -15,13 +15,13 @@
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to X or Z inputs
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
// sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.)
//
// The FMAC computes W=X*Y+Z, rounded with the mode specified by
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the X or Z inputs for use on the next cycle. In addition, four signals
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IEEE flags.
@ -29,29 +29,17 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fmac(x, y, z, rn, rz, rp, rm,
earlyres, earlyressel, bypsel, bypplus1, byppostnorm,
w, wbypass, invalid, overflow, underflow, inexact);
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
FmaResultM, FmaFlagsM, aligncnt);
/////////////////////////////////////////////////////////////////////////////
input [63:0] x; // input X from reg file
input [63:0] y; // input Y
input [63:0] z; // input Z from reg file
input rn; // Round to Nearest
input rz; // Round toward zero
input rm; // Round toward minus infinity
input rp; // Round toward plus infinity
input [63:0] earlyres; // Early result from other FP logic
input earlyressel; // Select early result, not W
input [1:0] bypsel; // Select W bypass to X, or z
input bypplus1; // Add one in bypass
input byppostnorm; // postnormalize in bypass
output [63:0] w; // output W=X*Y+Z
output [63:0] wbypass; // prerounded output W=X*Y+Z for bypass
output invalid; // Result is invalid
output overflow; // Result overflowed
output underflow; // Result underflowed
output inexact; // Result is not an exact number
input [63:0] ReadData1E; // input 1
input [63:0] ReadData2E; // input 2
input [63:0] ReadData3E; // input 3
input [2:0] FrmE; // Rounding mode
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
output [4:0] FmaFlagsM; // status flags
output [12:0] aligncnt; // status flags
// Internal nodes
@ -60,12 +48,12 @@ module fmac(x, y, z, rn, rz, rp, rm,
logic [163:0] t; // output of alignment shifter
logic [163:0] sum; // output of carry prop adder
logic [53:0] v; // normalized sum, R, S bits
logic [12:0] aligncnt; // shift count for alignment
// logic [12:0] aligncnt; // shift count for alignment
logic [8:0] normcnt; // shift count for normalizer
logic [12:0] ae; // multiplier expoent
logic bs; // sticky bit of addend
logic ps; // sticky bit of product
logic killprod; // Z >> product
logic killprod; // ReadData3E >> product
logic negsum; // negate sum
logic invz; // invert addend
logic selsum1; // select +1 mode of sum
@ -73,7 +61,7 @@ module fmac(x, y, z, rn, rz, rp, rm,
logic negsum1; // sum +1 < 0
logic sumzero; // sum = 0
logic infinity; // generate infinity on overflow
logic prodof; // X*Y out of range
logic prodof; // ReadData1E*ReadData2E out of range
logic sumof; // result out of range
logic xzero;
logic yzero;
@ -101,6 +89,9 @@ module fmac(x, y, z, rn, rz, rp, rm,
logic [8:0] sumshift;
logic sumshiftzero;
logic [12:0] de0;
logic isAdd;
assign isAdd = 1;
@ -117,16 +108,16 @@ module fmac(x, y, z, rn, rz, rp, rm,
// Instantiate fraction datapath
multiply multiply(.xman(x[51:0]), .yman(y[51:0]), .*);
align align(.zman(z[51:0]),.*);
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
align align(.zman(ReadData3E[51:0]),.*);
add add(.*);
lza lza(.*);
normalize normalize(.zexp(z[62:52]),.*);
round round(.xman(x[51:0]), .yman(y[51:0]),.zman(z[51:0]), .wman(w[51:0]),.wsign(w[63]),.*);
normalize normalize(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*);
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
// Instantiate exponent datapath
expgen expgen(.xexp(x[62:52]),.yexp(y[62:52]),.zexp(z[62:52]),.wexp(w[62:52]),.*);
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
// Instantiate special case detection across datapath & exponent path
special special(.*);
@ -134,8 +125,8 @@ module fmac(x, y, z, rn, rz, rp, rm,
// Instantiate control logic
sign sign(.xsign(x[63]),.ysign(y[63]),.zsign(z[63]),.wsign(w[63]),.*);
flag flag(.zsign(z[63]),.vbits(v[1:0]),.*);
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
endmodule

View File

@ -30,7 +30,7 @@ module lza(sum, normcnt, sumzero);
always @ ( sum)
begin
i = 0;
while (~sum[108-i] && i < 108) i = i+1; // search for leading one
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
normcnt = i; // compute shift count
end

View File

@ -10,8 +10,124 @@ module multiply(xman, yman, xdenorm, ydenorm, xzero, yzero, r, s);
input yzero; // Z is denorm
output [105:0] r; // partial product 1
output [105:0] s; // partial product 2
wire [54:0] yExt; //y with appended 0 and assumed 1
wire [53:0] xExt; //y with assumed 1
wire [26:0][1:0] add1;
wire [26:0][54:0] pp;
wire [26:0] e;
logic [17:0][105:0] lv1add;
logic [11:0][105:0] lv2add;
logic [7:0][105:0] lv3add;
logic [3:0][105:0] lv4add;
logic [21:0][106:0] carryTmp;
wire [26:0][105:0] acc;
// wire [105:0] acc
genvar i;
assign r = 106'b0;
assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
assign xExt = {2'b0,~(xdenorm|xzero),xman};
assign yExt = {2'b0,~(ydenorm|yzero),yman, 1'b0};
generate
for(i=0; i<27; i=i+1) begin
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
end
endgenerate
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]};
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
assign acc[26] = {pp[26],add1[25], 50'b0};
//*** resize adders
generate
for(i=0; i<9; i=i+1) begin
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
end
endgenerate
generate
for(i=0; i<6; i=i+1) begin
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
end
endgenerate
generate
for(i=0; i<4; i=i+1) begin
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
end
endgenerate
generate
for(i=0; i<2; i=i+1) begin
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
end
endgenerate
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
.carry(carryTmp[21]), .sum(s));
assign r = {carryTmp[21][104:0], 1'b0};
// assign r = 0;
// assign s = acc[0] +
// acc[1] +
// acc[2] +
// acc[3] +
// acc[4] +
// acc[5] +
// acc[6] +
// acc[7] +
// acc[8] +
// acc[9] +
// acc[10] +
// acc[11] +
// acc[12] +
// acc[13] +
// acc[14] +
// acc[15] +
// acc[16] +
// acc[17] +
// acc[18] +
// acc[19] +
// acc[20] +
// acc[21] +
// acc[22] +
// acc[23] +
// acc[24] +
// acc[25] +
// acc[26];
// assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
// assign r = 0;
endmodule

View File

@ -14,9 +14,11 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
module normalize(sum, xexp, yexp, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, zzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
/////////////////////////////////////////////////////////////////////////////
input [163:0] sum; // sum
input [62:52] xexp; // sum
input [62:52] yexp; // sum
input [62:52] zexp; // sum
input [8:0] normcnt; // normalization shift count
input [12:0] ae; // normalization shift count
@ -33,6 +35,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
input zdenorm; // Input Z is denormalized
input xzero;
input yzero;
input zzero;
output sticky; //sticky bit
output [12:0] de0;
output resultdenorm; // Input Z is denormalized
@ -47,6 +50,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
logic [9:0] sumshifttmp;
logic [163:0] sumshiftedtmp; // shifted sum
logic sticky;
logic isShiftLeft1;
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// When the sum is zero, normalization does not apply and only the
@ -60,21 +64,23 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// The sticky bit calculation is actually built into the shifter and
// does not require a true subtraction shown in the model.
assign isShiftLeft1 = (aligncnt == 1 ||aligncnt == 0 || $signed(aligncnt) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
assign tmp = ($signed(ae-normcnt+2) >= $signed(-1022));
always @(sum or sumshift or ae or aligncnt or normcnt or bs or zexp or zdenorm)
always @(sum or sumshift or ae or aligncnt or normcnt or bs or isShiftLeft1 or zexp or zdenorm)
begin
// d = aligncnt
// l = normcnt
// p = 53
// ea + eb = ae
// set d<=2 to d<=0
if ($signed(aligncnt)<=$signed(1)) begin //d<=2
if ($signed(aligncnt)<=$signed(2)) begin //d<=2
// product anchored or cancellation
if ($signed(ae-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
//normal result
de0 = xzero|yzero ? zexp : ae-normcnt+2+xdenorm+ydenorm;
resultdenorm = |sum & ~|de0;
sumshifted = resultdenorm ? sum << sumshift : sum << (55+normcnt); // p+2+l
de0 = xzero|yzero ? zexp : ae-normcnt+xdenorm+ydenorm+57;
resultdenorm = |sum & ~|de0 | de0[12];
// if z is zero then there was a 56 bit shift of the product
sumshifted = resultdenorm ? sum << sumshift-zzero+isShiftLeft1 : sum << normcnt; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bs;
//de0 = ae-normcnt+2-1023;
@ -90,8 +96,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
sumshifttmp = {1'b0,sumshift} - 2;
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
tmp2 = (sumshifttmp[9] || sumshifted[162]);
tmp3 = sumshifted[161];
tmp2 = ((sumshifttmp[9] & sumshift[0]) || sumshifted[162]);
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshift[1]));
tmp4 = sumshifted[160];
tmp5 = sumshifted[159];
// for some reason use exp = zexp + {0,1,2}
@ -112,25 +118,31 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bs;
//de0 = zexp-1;
de0 = zexp;
end else if(sumshifted[160]) begin
v = sumshifted[159:106];
de0 = zexp+zdenorm;
end else if(sumshifted[160]& ~zdenorm) begin
de0 = zexp-1;
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
sticky = (|sumshifted[105:0]) | bs;
//de0 = zexp-1;
de0 = zexp-1;
end else if(sumshifted[159]) begin
v = sumshifted[158:105];
end else if(sumshifted[159]& ~zdenorm) begin
//v = sumshifted[158:105];
de0 = zexp-2;
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
sticky = (|sumshifted[104:0]) | bs;
//de0 = zexp-1;
de0 = zexp-2;
end else begin
end else if(zdenorm) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bs;
//de0 = zexp-1;
de0 = zexp;
end else begin
de0 = 0;
sumshifted = sum << sumshift-1; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bs;
end
resultdenorm = ~(|de0);
resultdenorm = (~|de0 | de0[12]);
end
end

View File

@ -13,22 +13,17 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module round(v, sticky, rz, rn, rp, rm, wsign,
invalid, overflow, underflow, inf, nan, xnan, ynan, znan,
module round(v, sticky, FrmE, wsign,
FmaFlagsM, inf, nan, xnan, ynan, znan,
xman, yman, zman,
wman, infinity, specialsel,expplus1);
/////////////////////////////////////////////////////////////////////////////
input [53:0] v; // normalized sum, R, S bits
input sticky; //sticky bit
input rz; // Round toward zero
input rn; // Round toward nearest
input rp; // Round toward plus infinity
input rm; // Round toward minus infinity
input [2:0] FrmE;
input wsign; // Sign of result
input invalid; // Trap on infinity, NaN, denorm
input overflow; // Result overflowed
input underflow; // Result underflowed
input [4:0] FmaFlagsM;
input inf; // Some input is infinity
input nan; // Some input is NaN
input xnan; // X is NaN
@ -45,7 +40,7 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
// Internal nodes
wire plus1; // Round by adding one
logic plus1; // Round by adding one
wire [52:0] v1; // Result + 1 (for rounding)
wire [51:0] specialres; // Result of exceptional case
wire [51:0] infinityres; // Infinity or largest real number
@ -62,9 +57,19 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
// 0xx - do nothing
// 100 - tie - plus1 if v[2] = 1
// 101/110/111 - plus1
assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
(rp & ~wsign) |
(rm & wsign);
always @ (FrmE, v, wsign, sticky) begin
case (FrmE)
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
3'b001: plus1 = 0;//round to zero
3'b010: plus1 = wsign;//round down
3'b011: plus1 = ~wsign;//round up
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
default: plus1 = 1'bx;
endcase
end
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
// (rp & ~wsign) |
// (rm & wsign);
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
// rp && ~wsign && (v[1] || v[0]) ||
// rm && wsign && (v[1] || v[0]);
@ -84,17 +89,17 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
// inputs to the wide muxes can be combined at the expense of more
// complicated non-critical control in the circuit implementation.
assign specialsel = overflow || underflow || invalid ||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
nan || inf;
assign specialres = invalid | nan ? nanres : //KEP added nan
overflow ? infinityres :
assign specialres = FmaFlagsM[4] | nan ? nanres : //invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 52'b0 :
underflow ? 52'b0 : 52'bx; // default to undefined
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
// Overflow is handled differently for different rounding modes
// Round is to either infinity or to maximum finite number
assign infinity = rn || (rp && ~wsign) || (rm && wsign);
assign infinity = |FrmE;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
// Invalid operations produce a quiet NaN. The result should

View File

@ -10,23 +10,24 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm, overflow,
sumzero, nan, invalid, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign);
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, FrmE, FmaFlagsM, zzero,
sumzero, nan, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign, isAdd);
////////////////////////////////////////////////////////////////////////////I
input xsign; // Sign of X
input ysign; // Sign of Y
input zsign; // Sign of Z
input zzero;
input isAdd;
input negsum0; // Sum in +O mode is negative
input negsum1; // Sum in +1 mode is negative
input bs; // sticky bit from addend
input ps; // sticky bit from product
input killprod; // Product forced to zero
input rm; // Round toward minus infinity
input overflow; // Round toward minus infinity
input [2:0] FrmE; // Round toward minus infinity
input [4:0] FmaFlagsM; // Round toward minus infinity
input sumzero; // Sum = O
input nan; // Some input is NaN
input invalid; // Result invalid
input xinf; // X = Inf
input yinf; // Y = Inf
input zinf; // Y = Inf
@ -96,10 +97,24 @@ logic tmp;
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
assign zerosign = (~invz && killprod) ? zsign : rm;
//assign zerosign = (~invz && killprod) ? zsign : rm;//***look into
// assign zerosign = (~invz && killprod) ? zsign : 0;
// zero sign
// if product underflows then use psign
// otherwise
// addition
// if cancelation then 0 unless round to -inf
// otherwise psign
// subtraction
// if cancelation then 0 unless round to -inf
// otherwise psign
assign zerosign = FmaFlagsM[1] ? psign :
(isAdd ? (psign^zsign ? FrmE == 3'b010 : psign) :
(psign^zsign ? psign : FrmE == 3'b010));
assign infsign = zinf ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
//assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
assign tmp = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
assign wsign = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
endmodule

View File

@ -10,49 +10,49 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module special(x, y, z, ae, xzero, yzero, zzero,
module special(ReadData1E, ReadData2E, ReadData3E, ae, xzero, yzero, zzero,
xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, xinf, yinf, zinf);
/////////////////////////////////////////////////////////////////////////////
input [63:0] x; // Input x
input [63:0] y; // Input Y
input [63:0] z; // Input z
input [63:0] ReadData1E; // Input ReadData1E
input [63:0] ReadData2E; // Input ReadData2E
input [63:0] ReadData3E; // Input ReadData3E
input [12:0] ae; // exponent of product
output xzero; // Input x = 0
output yzero; // Input y = 0
output zzero; // Input z = 0
output xnan; // x is NaN
output ynan; // y is NaN
output znan; // z is NaN
output xdenorm; // x is denormalized
output ydenorm; // y is denormalized
output zdenorm; // z is denormalized
output xzero; // Input ReadData1E = 0
output yzero; // Input ReadData2E = 0
output zzero; // Input ReadData3E = 0
output xnan; // ReadData1E is NaN
output ynan; // ReadData2E is NaN
output znan; // ReadData3E is NaN
output xdenorm; // ReadData1E is denormalized
output ydenorm; // ReadData2E is denormalized
output zdenorm; // ReadData3E is denormalized
output proddenorm; // product is denormalized
output xinf; // x is infinity
output yinf; // y is infinity
output zinf; // z is infinity
output xinf; // ReadData1E is infinity
output yinf; // ReadData2E is infinity
output zinf; // ReadData3E is infinity
// In the actual circuit design, the gates looking at bits
// 51:0 and at bits 62:52 should be shared among the various detectors.
// Check if input is NaN
assign xnan = &x[62:52] && |x[51:0];
assign ynan = &y[62:52] && |y[51:0];
assign znan = &z[62:52] && |z[51:0];
assign xnan = &ReadData1E[62:52] && |ReadData1E[51:0];
assign ynan = &ReadData2E[62:52] && |ReadData2E[51:0];
assign znan = &ReadData3E[62:52] && |ReadData3E[51:0];
// Check if input is denormalized
assign xdenorm = ~(|x[62:52]) && |x[51:0];
assign ydenorm = ~(|y[62:52]) && |y[51:0];
assign zdenorm = ~(|z[62:52]) && |z[51:0];
assign xdenorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
assign ydenorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
assign zdenorm = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
assign proddenorm = &ae & ~xzero & ~yzero; //KEP is the product denormalized
// Check if input is infinity
assign xinf = &x[62:52] && ~(|x[51:0]);
assign yinf = &y[62:52] && ~(|y[51:0]);
assign zinf = &z[62:52] && ~(|z[51:0]);
assign xinf = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
assign yinf = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
assign zinf = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
// Check if inputs are all zero
// Also forces denormalized inputs to zero.
@ -60,11 +60,11 @@ module special(x, y, z, ae, xzero, yzero, zzero,
// to just check if the exponent is zero.
// KATHERINE - commented following (21/01/11)
// assign xzero = ~(|x[62:0]) || xdenorm;
// assign yzero = ~(|y[62:0]) || ydenorm;
// assign zzero = ~(|z[62:0]) || zdenorm;
// assign xzero = ~(|ReadData1E[62:0]) || xdenorm;
// assign yzero = ~(|ReadData2E[62:0]) || ydenorm;
// assign zzero = ~(|ReadData3E[62:0]) || zdenorm;
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
assign xzero = ~(|x[62:0]);
assign yzero = ~(|y[62:0]);
assign zzero = ~(|z[62:0]);
assign xzero = ~(|ReadData1E[62:0]);
assign yzero = ~(|ReadData2E[62:0]);
assign zzero = ~(|ReadData3E[62:0]);
endmodule

View File

@ -1,16 +1 @@
0010000000000000 bf4fdffffff7fffe 800ffffffffffffe 800003fbfffffefe 801003fbfffffefe Wrong zdenorm 308227
0010000000000000 be6fffffbffffff7 8000000000000000 800000001fffffc0 800000000fffffe0 Wrong 313753
001ffffffffffffe 3fddfbffffffffff 000ffffffffffffe 000efdfffffffffd 001efdfffffffffd Wrong zdenorm 551371
3befe000ffffffff 800ffffffffffffe 0000000000000000 0000000000000000 8000000000000000 Wrong ydenorm unflw 665575
000007fffffffffe 3f6ffffffe01fffe 000ffffffffffffe 00000007ffffff7e 00100007ffffff7e Wrong xdenorm zdenorm 768727
3fdffffffffffffe 000ffffffffffffe 8000000000000001 7feffffffffffff6 0007fffffffffffe Wrong ydenorm zdenorm 1049939
7fe0000000000001 4000000000000000 ffefffffffffffff 7ff0000000000000 7cb8000000000000 Wrong w=+inf 2602745
000fff000000000f 3ff00800001fffff 8010000000000000 7f7bfe007ff8381e 000006ff801ffe0e Wrong xdenorm 3117277
8000000000000001 40211275ffe5ee3c 0000000000000001 fcfe24ebffcbdc78 8000000000000008 Wrong xdenorm zdenorm 3148591
801fffffffffffff bfdffffffffffffe 0000000000021fff 0000000000021ffe 0010000000021ffe Wrong zdenorm 3537867
801ffffffffffffe 0010000000000001 0000000000000000 0000000000000000 8000000000000000 Wrong unflw 3564269
bca0000000000001 000fffffc000001e 8000000000000000 8000000000000001 8000000000000000 Wrong ydenorm 3717769
bcafffffffffffff 800ffffffffffffe 8000000000000000 0000000000000002 0000000000000001 Wrong ydenorm 3807413
7fec5fed92358a74 400000001bffffff ffefc0003ffffffe 7ff0000000000000 7fe8ffdb47bad466 Wrong w=+inf 3889689
bfdfffffffffffff 3fdf1f3616aa73e1 3fd0000000000001 3fd07064f4aac611 3f7c193d2ab1843f Wrong 4099063
3fd07dfffffffffe 8010000000000001 0000000000000001 ffe07dfffffffffb 80041f7fffffffff Wrong zdenorm 4716133
c3f000200003fffe 0000000000000001 001ffffffffffffe 80cffc400007fffd 80cffc400007fffc Wrong FmaResultM= -64 ydenorm 1119653

Binary file not shown.

View File

@ -20,19 +20,19 @@ void main() {
// b68ffff8000000ff_3f9080000007ffff_b6307ffbe0080080_00001
char ch;
int i,j,n;
char x[17];
char y[17];
char z[17];
char ReadData1E[17];
char ReadData2E[17];
char ReadData3E[17];
char ans[81];
char flags[3];
int rn,rz,rm,rp;
long stop = 4099063;
int FrmE;
long stop = 1119653;
int debug = 1;
//my_string = (char *) malloc (nbytes + 1);
//bytes_read = getline (&my_string, &nbytes, stdin);
for(n=0; n < 613; n++) {//613 for 10000
for(n=0; n < 305; n++) {//613 for 10000
if(getline(&ln,&nbytes,fp) < 0 || feof(fp)) break;
if(k == stop && debug == 1) break;
k++;
@ -41,71 +41,59 @@ void main() {
if(!feof(fp)) {
strncpy(x, ln, 16); x[16]=0;
strncpy(y, &ln[17], 16); y[16]=0;
strncpy(z, &ln[34], 16); z[16]=0;
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
strncpy(ReadData1E, ln, 16); ReadData1E[16]=0;
strncpy(ReadData2E, &ln[17], 16); ReadData2E[16]=0;
strncpy(ReadData3E, &ln[34], 16); ReadData3E[16]=0;
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
strncpy(ans, &ln[51], 16); ans[16]=0;
strncpy(flags,&ln[68],2); flags[2]=0;
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
fprintf(fq," x = 64'h%s;\n",x);
fprintf(fq," y = 64'h%s;\n",y);
fprintf(fq," z = 64'h%s;\n",z);
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
fprintf(fq," ReadData1E = 64'h%s;\n",ReadData1E);
fprintf(fq," ReadData2E = 64'h%s;\n",ReadData2E);
fprintf(fq," ReadData3E = 64'h%s;\n",ReadData3E);
fprintf(fq," ans = 64'h%s;\n", ans);
// fprintf(fq," flags = 5'h%s;\n", flags);
{
//rn=1; rz=0; rm=0; rp=0;
fprintf(fq," rn = %d;\n",1);
fprintf(fq," rz = %d;\n", 0);
fprintf(fq," rm = %d;\n", 0);
fprintf(fq," rp = %d;\n", 0);
}
{
fprintf(fq," earlyres = 64'b0;\n");
fprintf(fq," earlyressel = 0;\n");
}
{
fprintf(fq," bypsel= 2'b0;\n"); //, bysel);
fprintf(fq," bypplus1 = 0;\n"); //, byp1);
fprintf(fq," byppostnorm = 0;\n"); //, bypnorm);
fprintf(fq," FrmE = 3'b000;\n");
}
fprintf(fq,"#10\n");
// IEEE 754-2008 section 6.3 states "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",x,y,w, ans);\n");
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",ReadData1E,ReadData2E,FmaResultM, ans);\n");
fprintf(fq," // IEEE 754-2008 section 6.3 states: \"When ether an input or result is NaN, this\n");
fprintf(fq," // standard does not interpret the sign of a NaN.\"\n");
fprintf(fq," wnan = &w[62:52] && |w[51:0]; \n");
fprintf(fq," xnan = &x[62:52] && |x[51:0]; \n");
fprintf(fq," ynan = &y[62:52] && |y[51:0]; \n");
fprintf(fq," znan = &z[62:52] && |z[51:0]; \n");
fprintf(fq," wnan = &FmaResultM[62:52] && |FmaResultM[51:0]; \n");
fprintf(fq," xnan = &ReadData1E[62:52] && |ReadData1E[51:0]; \n");
fprintf(fq," ynan = &ReadData2E[62:52] && |ReadData2E[51:0]; \n");
fprintf(fq," znan = &ReadData3E[62:52] && |ReadData3E[51:0]; \n");
fprintf(fq," ansnan = &ans[62:52] && |ans[51:0]; \n");
fprintf(fq," xnorm = ~(|x[62:52]) && |x[51:0] ? {x[50:0], 1'b0} : x; \n");
fprintf(fq," ynorm = ~(|y[62:52]) && |y[51:0] ? {y[50:0], 1'b0} : y;\n");
fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
// fprintf(fq," if(!(~(|x[62:52]) && |x[51:0] || ~(|y[62:52]) && |y[51:0])) begin\n");
fprintf(fq," xnorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0] ? {ReadData1E[50:0], 1'b0} : ReadData1E; \n");
fprintf(fq," ynorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0] ? {ReadData2E[50:0], 1'b0} : ReadData2E;\n");
// fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
// fprintf(fq," if(!(~(|ReadData1E[62:52]) && |ReadData1E[51:0] || ~(|ReadData2E[62:52]) && |ReadData2E[51:0])) begin\n");
// not looknig at negative zero results right now
//fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) && !(w == 64'h8000000000000000 && ans == 64'b0)) begin\n");
// fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) ) begin\n");
fprintf(fq," if((!wnan && (w != ans)) || (wnan && ansnan && ~(((xnan && (w[62:0] == {x[62:52],1'b1,x[50:0]})) || (ynan && (w[62:0] == {y[62:52],1'b1,y[50:0]})) || (znan && (w[62:0] == {z[62:52],1'b1,z[50:0]})) || (w[62:0] == ans[62:0])) ))) begin\n");
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",x,y, z, w, ans);\n");
//fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) && !(FmaResultM == 64'h8000000000000000 && ans == 64'b0)) begin\n");
// fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) ) begin\n");
fprintf(fq," if((!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {ReadData1E[62:52],1'b1,ReadData1E[50:0]})) || (ynan && (FmaResultM[62:0] == {ReadData2E[62:52],1'b1,ReadData2E[50:0]})) || (znan && (FmaResultM[62:0] == {ReadData3E[62:52],1'b1,ReadData3E[50:0]})) || (FmaResultM[62:0] == ans[62:0])) ))) begin\n");
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",ReadData1E,ReadData2E, ReadData3E, FmaResultM, ans);\n");
//fprintf(fq," $fwrite(fp, \"%%h \",s);\n");
fprintf(fq," if(w == 64'h8000000000000000) $fwrite(fp, \"w=-zero \");\n");
fprintf(fq," if(~(|x[62:52]) && |x[51:0]) $fwrite(fp, \"xdenorm \");\n");
fprintf(fq," if(~(|y[62:52]) && |y[51:0]) $fwrite(fp, \"ydenorm \");\n");
fprintf(fq," if(~(|z[62:52]) && |z[51:0]) $fwrite(fp, \"zdenorm \");\n");
fprintf(fq," if(invalid != 0) $fwrite(fp, \"invld \");\n");
fprintf(fq," if(overflow != 0) $fwrite(fp, \"ovrflw \");\n");
fprintf(fq," if(underflow != 0) $fwrite(fp, \"unflw \");\n");
fprintf(fq," if(w == 64'hFFF0000000000000) $fwrite(fp, \"w=-inf \");\n");
fprintf(fq," if(w == 64'h7FF0000000000000) $fwrite(fp, \"w=+inf \");\n");
fprintf(fq," if(w > 64'h7FF0000000000000 && w < 64'h7FF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
fprintf(fq," if(w > 64'hFFF8000000000000 && w < 64'hFFF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
fprintf(fq," if(w >= 64'h7FF8000000000000 && w <= 64'h7FFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
fprintf(fq," if(w >= 64'hFFF8000000000000 && w <= 64'hFFFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
fprintf(fq," $fwrite(fp, \"FmaResultM=%%d \",$signed(aligncnt));\n");
fprintf(fq," if(FmaResultM == 64'h8000000000000000) $fwrite(fp, \"FmaResultM=-zero \");\n");
fprintf(fq," if(~(|ReadData1E[62:52]) && |ReadData1E[51:0]) $fwrite(fp, \"xdenorm \");\n");
fprintf(fq," if(~(|ReadData2E[62:52]) && |ReadData2E[51:0]) $fwrite(fp, \"ydenorm \");\n");
fprintf(fq," if(~(|ReadData3E[62:52]) && |ReadData3E[51:0]) $fwrite(fp, \"zdenorm \");\n");
fprintf(fq," if(FmaFlagsM[4] != 0) $fwrite(fp, \"invld \");\n");
fprintf(fq," if(FmaFlagsM[2] != 0) $fwrite(fp, \"ovrflw \");\n");
fprintf(fq," if(FmaFlagsM[1] != 0) $fwrite(fp, \"unflw \");\n");
fprintf(fq," if(FmaResultM == 64'hFFF0000000000000) $fwrite(fp, \"FmaResultM=-inf \");\n");
fprintf(fq," if(FmaResultM == 64'h7FF0000000000000) $fwrite(fp, \"FmaResultM=+inf \");\n");
fprintf(fq," if(FmaResultM > 64'h7FF0000000000000 && FmaResultM < 64'h7FF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
fprintf(fq," if(FmaResultM > 64'hFFF8000000000000 && FmaResultM < 64'hFFF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
fprintf(fq," if(FmaResultM >= 64'h7FF8000000000000 && FmaResultM <= 64'h7FFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
fprintf(fq," if(FmaResultM >= 64'hFFF8000000000000 && FmaResultM <= 64'hFFFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
fprintf(fq," if(ans == 64'hFFF0000000000000) $fwrite(fp, \"ans=-inf \");\n");
fprintf(fq," if(ans == 64'h7FF0000000000000) $fwrite(fp, \"ans=+inf \");\n");

File diff suppressed because it is too large Load Diff

View File

@ -2,38 +2,27 @@
module tb;
reg [63:0] x;
reg [63:0] y;
reg [63:0] z;
reg [63:0] ans;
reg rn;
reg rz;
reg rm;
reg rp;
reg [63:0] earlyres;
reg earlyressel;
reg [1:0] bypsel;
reg bypplus1;
reg byppostnorm;
wire [63:0] w;
wire [63:0] wbypass;
wire invalid;
wire overflow;
wire underflow;
wire inexact;
reg [63:0] ReadData1E;
reg [63:0] ReadData2E;
reg [63:0] ReadData3E;
reg [63:0] ans;
reg [2:0] FrmE;
wire [63:0] FmaResultM;
wire [4:0] FmaFlagsM;
integer fp;
reg wnan;
reg xnan;
reg ynan;
reg znan;
wire [12:0] aligncnt;
reg ansnan;
reg [105:0] s; // partial product 2
reg [51:0] xnorm;
reg [51:0] ynorm;
localparam period = 20;
fmac UUT(.*);
fma UUT(.*);
initial

View File

@ -1 +1 @@
testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat

View File

@ -50,7 +50,7 @@ module FA_array (S, C, A, B, Ci) ;
genvar i;
generate
for (i = 0; i < n; i = i + 1) begin : index
fa FA1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]), .Ci(Ci[i]));
fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
end
endgenerate