forked from Github_Repos/cvw
Various bugs fixed in FMA
This commit is contained in:
parent
03bb37a849
commit
ef011496a7
@ -48,7 +48,7 @@ module add(r, s, t, sum,
|
||||
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
assign prodshifted = killprod ? 0 : {56'b0, r2, 2'b0} + {56'b0, s2, 2'b0};
|
||||
assign prodshifted = killprod ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
|
||||
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above
|
||||
|
||||
|
@ -56,7 +56,7 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
|
||||
// addend on right shifts. Handle special cases of shifting
|
||||
// by too much.
|
||||
|
||||
always @(aligncnt or zman or zdenorm)
|
||||
always @(aligncnt or xzero or yzero or zman or zdenorm or zzero)
|
||||
begin
|
||||
|
||||
// Default to clearing sticky bits
|
||||
@ -67,26 +67,23 @@ module align(zman, ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t, bs
|
||||
killprod = xzero | yzero;
|
||||
// d = aligncnt
|
||||
// p = 53
|
||||
if ($signed(aligncnt) <= $signed(-103)) begin //d<=-2p+1
|
||||
if ($signed(aligncnt) <= $signed(-105)) begin //d<=-2p+1
|
||||
//product ancored case with saturated shift
|
||||
sumshift = 163; // 3p+4
|
||||
sumshiftzero = 0;
|
||||
shift = {~zdenorm,zman,163'b0} >> sumshift;
|
||||
shift = {1'b1,zman,163'b0} >> sumshift;
|
||||
t = zzero ? 0 : {shift[215:52]};
|
||||
bs = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if($signed(aligncnt) <= $signed(1)) begin // -2p+1<d<=2
|
||||
// set d<=2 to d<=0
|
||||
end else if($signed(aligncnt) <= $signed(2)) begin // -2p+1<d<=2
|
||||
// product ancored or cancellation
|
||||
// warning: set to 55 rather then 56. was there a typo in the book?
|
||||
sumshift = 57-aligncnt; // p + 3 - d
|
||||
sumshift = 57-aligncnt; // p + 2 - d
|
||||
sumshiftzero = 0;
|
||||
shift = {~zdenorm,zman,163'b0} >> sumshift;
|
||||
t = zzero ? 0 : {shift[215:52]};
|
||||
bs = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if ($signed(aligncnt)<=$signed(55)) begin // 2 < d <= p+2
|
||||
// another typo in book? above was 55 changed to 52
|
||||
// addend ancored case
|
||||
// used to be 56 \/ somthing doesn't seem right too many typos
|
||||
sumshift = 57-aligncnt;
|
||||
|
55
wally-pipelined/src/fpu/FMA/booth.sv
Normal file
55
wally-pipelined/src/fpu/FMA/booth.sv
Normal file
@ -0,0 +1,55 @@
|
||||
module booth(xExt, choose, add1, e, pp);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] xExt; // multiplicand xExt
|
||||
input [2:0] choose; // bits needed to choose which encoding
|
||||
output [1:0] add1; // do you add 1
|
||||
output e;
|
||||
output [54:0] pp; // the resultant encoding
|
||||
|
||||
logic [54:0] pp, temp;
|
||||
logic e;
|
||||
logic [1:0] add1;
|
||||
logic [53:0] negx;
|
||||
//logic temp;
|
||||
|
||||
assign negx = ~xExt;
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
case (choose)
|
||||
3'b000 : pp = 55'b0; // 0
|
||||
3'b001 : pp = {1'b0, xExt}; // 1
|
||||
3'b010 : pp = {1'b0, xExt}; // 1
|
||||
3'b011 : pp = {xExt, 1'b0}; // 2
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {1'b1, negx}; // -1
|
||||
3'b110 : pp = {1'b1, negx}; // -1
|
||||
3'b111 : pp = 55'hfffffffffffffff; // -0
|
||||
endcase
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
case (choose)
|
||||
3'b000 : e = 0; // 0
|
||||
3'b001 : e = 0; // 1
|
||||
3'b010 : e = 0; // 1
|
||||
3'b011 : e = 0; // 2
|
||||
3'b100 : e = 1; // -2
|
||||
3'b101 : e = 1; // -1
|
||||
3'b110 : e = 1; // -1
|
||||
3'b111 : e = 1; // -0
|
||||
endcase
|
||||
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
|
||||
// assign add1 = choose[2];
|
||||
always @(choose)
|
||||
case (choose)
|
||||
3'b000 : add1 = 2'b0; // 0
|
||||
3'b001 : add1 = 2'b0; // 1
|
||||
3'b010 : add1 = 2'b0; // 1
|
||||
3'b011 : add1 = 2'b0; // 2
|
||||
3'b100 : add1 = 2'b10; // -2
|
||||
3'b101 : add1 = 2'b1; // -1
|
||||
3'b110 : add1 = 2'b1; // -1
|
||||
3'b111 : add1 = 2'b1; // -0
|
||||
endcase
|
||||
|
||||
endmodule
|
90
wally-pipelined/src/fpu/FMA/compressors.sv
Normal file
90
wally-pipelined/src/fpu/FMA/compressors.sv
Normal file
@ -0,0 +1,90 @@
|
||||
module add3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
output [BITS-1:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
for(i= 0; i<BITS; i=i+1) begin
|
||||
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
module add4comp2(a, b, c, d, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
input [BITS-1:0] d;
|
||||
output [BITS:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
|
||||
logic [BITS-1:0] cout;
|
||||
logic carryTmp;
|
||||
genvar i;
|
||||
|
||||
|
||||
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
|
||||
|
||||
generate
|
||||
for(i= 1; i<BITS-1; i=i+1) begin
|
||||
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
|
||||
|
||||
assign carry[BITS-1] = carryTmp & cout[BITS-1];
|
||||
assign carry[BITS] = carryTmp ^ cout[BITS-1];
|
||||
|
||||
endmodule
|
||||
|
||||
module sng3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
output carry;
|
||||
output sum;
|
||||
|
||||
logic axorb;
|
||||
|
||||
assign axorb = a ^ b;
|
||||
assign sum = axorb ^ c;
|
||||
|
||||
assign carry = axorb ? c : a;
|
||||
|
||||
endmodule
|
||||
|
||||
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into pass gate 4:2 counters?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
input d;
|
||||
input cin;
|
||||
output cout;
|
||||
output carry;
|
||||
output sum;
|
||||
|
||||
logic TmpSum;
|
||||
|
||||
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
|
||||
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
|
||||
|
||||
endmodule
|
@ -17,7 +17,7 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen(xexp, yexp, zexp,
|
||||
killprod, sumzero, resultdenorm, normcnt, infinity,
|
||||
invalid, overflow, underflow, inf, xzero, yzero,expplus1,
|
||||
FmaFlagsM, inf, xzero, yzero,expplus1,
|
||||
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
|
||||
aligncnt, wexp,
|
||||
prodof, sumof, sumuf, denorm0, ae);
|
||||
@ -31,9 +31,7 @@ module expgen(xexp, yexp, zexp,
|
||||
input resultdenorm; // postnormalize rounded result
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input infinity; // generate infinity on overflow
|
||||
input invalid; // Result invalid
|
||||
input overflow; // Result overflowed
|
||||
input underflow; // Result underflowed
|
||||
input [4:0] FmaFlagsM; // Result invalid
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input [12:0] de0; // X is NaN NaN
|
||||
@ -121,10 +119,10 @@ module expgen(xexp, yexp, zexp,
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = invalid | nan ? nanres : // KEP added nan
|
||||
overflow ? infinityres :
|
||||
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 11'b11111111111 :
|
||||
underflow ? 11'b0 : 11'bx;
|
||||
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
|
@ -10,12 +10,13 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
psign, zsign, xzero, yzero, zzero, vbits, killprod,
|
||||
inf, nan, invalid, overflow, underflow, inexact);
|
||||
inf, nan, FmaFlagsM,sticky);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input sticky; // X is Inf
|
||||
input xinf; // X is Inf
|
||||
input yinf; // Y is Inf
|
||||
input zinf; // Z is Inf
|
||||
@ -31,10 +32,7 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
input [1:0] vbits; // R and S bits of result
|
||||
output inf; // Some source is Inf
|
||||
output nan; // Some source is NaN
|
||||
output invalid; // Result is invalid
|
||||
output overflow; // Result overflowed
|
||||
output underflow; // Result underflowed
|
||||
output inexact; // Result is not an exact number
|
||||
output [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -55,33 +53,36 @@ module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
|
||||
assign prodinf = prodof && ~xnan && ~ynan;
|
||||
//KEP added if the product is infinity then sum is infinity
|
||||
assign suminf = prodinf | sumof && ~xnan && ~ynan && ~znan;
|
||||
assign suminf = sumof && ~xnan && ~ynan && ~znan;
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
|
||||
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
|
||||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
|
||||
|
||||
assign FmaFlagsM[3] = 0; // divide by zero flag
|
||||
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign overflow = suminf && ~inf;
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input is denormalized
|
||||
// 2) Output would be denormalized or smaller
|
||||
|
||||
assign underflow = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign inexact = (vbits[0] || vbits[1] || suminf) && ~(inf || nan);
|
||||
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
|
||||
|
||||
endmodule
|
||||
|
@ -15,13 +15,13 @@
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to X or Z inputs
|
||||
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes W=X*Y+Z, rounded with the mode specified by
|
||||
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the X or Z inputs for use on the next cycle. In addition, four signals
|
||||
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
@ -29,29 +29,17 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fmac(x, y, z, rn, rz, rp, rm,
|
||||
earlyres, earlyressel, bypsel, bypplus1, byppostnorm,
|
||||
w, wbypass, invalid, overflow, underflow, inexact);
|
||||
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
FmaResultM, FmaFlagsM, aligncnt);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] x; // input X from reg file
|
||||
input [63:0] y; // input Y
|
||||
input [63:0] z; // input Z from reg file
|
||||
input rn; // Round to Nearest
|
||||
input rz; // Round toward zero
|
||||
input rm; // Round toward minus infinity
|
||||
input rp; // Round toward plus infinity
|
||||
input [63:0] earlyres; // Early result from other FP logic
|
||||
input earlyressel; // Select early result, not W
|
||||
input [1:0] bypsel; // Select W bypass to X, or z
|
||||
input bypplus1; // Add one in bypass
|
||||
input byppostnorm; // postnormalize in bypass
|
||||
output [63:0] w; // output W=X*Y+Z
|
||||
output [63:0] wbypass; // prerounded output W=X*Y+Z for bypass
|
||||
output invalid; // Result is invalid
|
||||
output overflow; // Result overflowed
|
||||
output underflow; // Result underflowed
|
||||
output inexact; // Result is not an exact number
|
||||
input [63:0] ReadData1E; // input 1
|
||||
input [63:0] ReadData2E; // input 2
|
||||
input [63:0] ReadData3E; // input 3
|
||||
input [2:0] FrmE; // Rounding mode
|
||||
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
|
||||
output [4:0] FmaFlagsM; // status flags
|
||||
output [12:0] aligncnt; // status flags
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -60,12 +48,12 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
logic [163:0] t; // output of alignment shifter
|
||||
logic [163:0] sum; // output of carry prop adder
|
||||
logic [53:0] v; // normalized sum, R, S bits
|
||||
logic [12:0] aligncnt; // shift count for alignment
|
||||
// logic [12:0] aligncnt; // shift count for alignment
|
||||
logic [8:0] normcnt; // shift count for normalizer
|
||||
logic [12:0] ae; // multiplier expoent
|
||||
logic bs; // sticky bit of addend
|
||||
logic ps; // sticky bit of product
|
||||
logic killprod; // Z >> product
|
||||
logic killprod; // ReadData3E >> product
|
||||
logic negsum; // negate sum
|
||||
logic invz; // invert addend
|
||||
logic selsum1; // select +1 mode of sum
|
||||
@ -73,7 +61,7 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
logic negsum1; // sum +1 < 0
|
||||
logic sumzero; // sum = 0
|
||||
logic infinity; // generate infinity on overflow
|
||||
logic prodof; // X*Y out of range
|
||||
logic prodof; // ReadData1E*ReadData2E out of range
|
||||
logic sumof; // result out of range
|
||||
logic xzero;
|
||||
logic yzero;
|
||||
@ -101,6 +89,9 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
logic [8:0] sumshift;
|
||||
logic sumshiftzero;
|
||||
logic [12:0] de0;
|
||||
logic isAdd;
|
||||
|
||||
assign isAdd = 1;
|
||||
|
||||
|
||||
|
||||
@ -117,16 +108,16 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
multiply multiply(.xman(x[51:0]), .yman(y[51:0]), .*);
|
||||
align align(.zman(z[51:0]),.*);
|
||||
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
|
||||
align align(.zman(ReadData3E[51:0]),.*);
|
||||
add add(.*);
|
||||
lza lza(.*);
|
||||
normalize normalize(.zexp(z[62:52]),.*);
|
||||
round round(.xman(x[51:0]), .yman(y[51:0]),.zman(z[51:0]), .wman(w[51:0]),.wsign(w[63]),.*);
|
||||
normalize normalize(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*);
|
||||
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen expgen(.xexp(x[62:52]),.yexp(y[62:52]),.zexp(z[62:52]),.wexp(w[62:52]),.*);
|
||||
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(.*);
|
||||
@ -134,8 +125,8 @@ module fmac(x, y, z, rn, rz, rp, rm,
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
sign sign(.xsign(x[63]),.ysign(y[63]),.zsign(z[63]),.wsign(w[63]),.*);
|
||||
flag flag(.zsign(z[63]),.vbits(v[1:0]),.*);
|
||||
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
|
||||
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
|
||||
|
||||
endmodule
|
||||
|
@ -30,7 +30,7 @@ module lza(sum, normcnt, sumzero);
|
||||
always @ ( sum)
|
||||
begin
|
||||
i = 0;
|
||||
while (~sum[108-i] && i < 108) i = i+1; // search for leading one
|
||||
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
|
||||
normcnt = i; // compute shift count
|
||||
end
|
||||
|
||||
|
@ -10,8 +10,124 @@ module multiply(xman, yman, xdenorm, ydenorm, xzero, yzero, r, s);
|
||||
input yzero; // Z is denorm
|
||||
output [105:0] r; // partial product 1
|
||||
output [105:0] s; // partial product 2
|
||||
|
||||
wire [54:0] yExt; //y with appended 0 and assumed 1
|
||||
wire [53:0] xExt; //y with assumed 1
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [17:0][105:0] lv1add;
|
||||
logic [11:0][105:0] lv2add;
|
||||
logic [7:0][105:0] lv3add;
|
||||
logic [3:0][105:0] lv4add;
|
||||
logic [21:0][106:0] carryTmp;
|
||||
wire [26:0][105:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
assign r = 106'b0;
|
||||
assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
|
||||
assign xExt = {2'b0,~(xdenorm|xzero),xman};
|
||||
assign yExt = {2'b0,~(ydenorm|yzero),yman, 1'b0};
|
||||
|
||||
generate
|
||||
for(i=0; i<27; i=i+1) begin
|
||||
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//*** resize adders
|
||||
generate
|
||||
for(i=0; i<9; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<6; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<4; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
generate
|
||||
for(i=0; i<2; i=i+1) begin
|
||||
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(s));
|
||||
assign r = {carryTmp[21][104:0], 1'b0};
|
||||
// assign r = 0;
|
||||
// assign s = acc[0] +
|
||||
// acc[1] +
|
||||
// acc[2] +
|
||||
// acc[3] +
|
||||
// acc[4] +
|
||||
// acc[5] +
|
||||
// acc[6] +
|
||||
// acc[7] +
|
||||
// acc[8] +
|
||||
// acc[9] +
|
||||
// acc[10] +
|
||||
// acc[11] +
|
||||
// acc[12] +
|
||||
// acc[13] +
|
||||
// acc[14] +
|
||||
// acc[15] +
|
||||
// acc[16] +
|
||||
// acc[17] +
|
||||
// acc[18] +
|
||||
// acc[19] +
|
||||
// acc[20] +
|
||||
// acc[21] +
|
||||
// acc[22] +
|
||||
// acc[23] +
|
||||
// acc[24] +
|
||||
// acc[25] +
|
||||
// acc[26];
|
||||
|
||||
// assign s = {53'b0,~(xdenorm|xzero),xman} * {53'b0,~(ydenorm|yzero),yman};
|
||||
// assign r = 0;
|
||||
endmodule
|
||||
|
@ -14,9 +14,11 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
|
||||
module normalize(sum, xexp, yexp, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero, sumzero, xzero, zzero, yzero, bs, ps, denorm0, xdenorm, ydenorm, zdenorm, sticky, de0, resultdenorm, v);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
input [163:0] sum; // sum
|
||||
input [62:52] xexp; // sum
|
||||
input [62:52] yexp; // sum
|
||||
input [62:52] zexp; // sum
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input [12:0] ae; // normalization shift count
|
||||
@ -33,6 +35,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
|
||||
input zdenorm; // Input Z is denormalized
|
||||
input xzero;
|
||||
input yzero;
|
||||
input zzero;
|
||||
output sticky; //sticky bit
|
||||
output [12:0] de0;
|
||||
output resultdenorm; // Input Z is denormalized
|
||||
@ -47,6 +50,7 @@ module normalize(sum, zexp, invz, normcnt, ae, aligncnt, sumshift, sumshiftzero,
|
||||
logic [9:0] sumshifttmp;
|
||||
logic [163:0] sumshiftedtmp; // shifted sum
|
||||
logic sticky;
|
||||
logic isShiftLeft1;
|
||||
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
// When the sum is zero, normalization does not apply and only the
|
||||
@ -60,21 +64,23 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
assign isShiftLeft1 = (aligncnt == 1 ||aligncnt == 0 || $signed(aligncnt) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
|
||||
assign tmp = ($signed(ae-normcnt+2) >= $signed(-1022));
|
||||
always @(sum or sumshift or ae or aligncnt or normcnt or bs or zexp or zdenorm)
|
||||
always @(sum or sumshift or ae or aligncnt or normcnt or bs or isShiftLeft1 or zexp or zdenorm)
|
||||
begin
|
||||
// d = aligncnt
|
||||
// l = normcnt
|
||||
// p = 53
|
||||
// ea + eb = ae
|
||||
// set d<=2 to d<=0
|
||||
if ($signed(aligncnt)<=$signed(1)) begin //d<=2
|
||||
if ($signed(aligncnt)<=$signed(2)) begin //d<=2
|
||||
// product anchored or cancellation
|
||||
if ($signed(ae-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
|
||||
//normal result
|
||||
de0 = xzero|yzero ? zexp : ae-normcnt+2+xdenorm+ydenorm;
|
||||
resultdenorm = |sum & ~|de0;
|
||||
sumshifted = resultdenorm ? sum << sumshift : sum << (55+normcnt); // p+2+l
|
||||
de0 = xzero|yzero ? zexp : ae-normcnt+xdenorm+ydenorm+57;
|
||||
resultdenorm = |sum & ~|de0 | de0[12];
|
||||
// if z is zero then there was a 56 bit shift of the product
|
||||
sumshifted = resultdenorm ? sum << sumshift-zzero+isShiftLeft1 : sum << normcnt; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bs;
|
||||
//de0 = ae-normcnt+2-1023;
|
||||
@ -90,8 +96,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
sumshifttmp = {1'b0,sumshift} - 2;
|
||||
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
|
||||
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
|
||||
tmp2 = (sumshifttmp[9] || sumshifted[162]);
|
||||
tmp3 = sumshifted[161];
|
||||
tmp2 = ((sumshifttmp[9] & sumshift[0]) || sumshifted[162]);
|
||||
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshift[1]));
|
||||
tmp4 = sumshifted[160];
|
||||
tmp5 = sumshifted[159];
|
||||
// for some reason use exp = zexp + {0,1,2}
|
||||
@ -112,25 +118,31 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp;
|
||||
end else if(sumshifted[160]) begin
|
||||
v = sumshifted[159:106];
|
||||
de0 = zexp+zdenorm;
|
||||
end else if(sumshifted[160]& ~zdenorm) begin
|
||||
de0 = zexp-1;
|
||||
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
|
||||
sticky = (|sumshifted[105:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp-1;
|
||||
end else if(sumshifted[159]) begin
|
||||
v = sumshifted[158:105];
|
||||
end else if(sumshifted[159]& ~zdenorm) begin
|
||||
//v = sumshifted[158:105];
|
||||
de0 = zexp-2;
|
||||
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
|
||||
sticky = (|sumshifted[104:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp-2;
|
||||
end else begin
|
||||
end else if(zdenorm) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bs;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp;
|
||||
end else begin
|
||||
de0 = 0;
|
||||
sumshifted = sum << sumshift-1; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bs;
|
||||
end
|
||||
|
||||
resultdenorm = ~(|de0);
|
||||
resultdenorm = (~|de0 | de0[12]);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -13,22 +13,17 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
invalid, overflow, underflow, inf, nan, xnan, ynan, znan,
|
||||
module round(v, sticky, FrmE, wsign,
|
||||
FmaFlagsM, inf, nan, xnan, ynan, znan,
|
||||
xman, yman, zman,
|
||||
wman, infinity, specialsel,expplus1);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] v; // normalized sum, R, S bits
|
||||
input sticky; //sticky bit
|
||||
input rz; // Round toward zero
|
||||
input rn; // Round toward nearest
|
||||
input rp; // Round toward plus infinity
|
||||
input rm; // Round toward minus infinity
|
||||
input [2:0] FrmE;
|
||||
input wsign; // Sign of result
|
||||
input invalid; // Trap on infinity, NaN, denorm
|
||||
input overflow; // Result overflowed
|
||||
input underflow; // Result underflowed
|
||||
input [4:0] FmaFlagsM;
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input xnan; // X is NaN
|
||||
@ -45,7 +40,7 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire plus1; // Round by adding one
|
||||
logic plus1; // Round by adding one
|
||||
wire [52:0] v1; // Result + 1 (for rounding)
|
||||
wire [51:0] specialres; // Result of exceptional case
|
||||
wire [51:0] infinityres; // Infinity or largest real number
|
||||
@ -62,9 +57,19 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - plus1 if v[2] = 1
|
||||
// 101/110/111 - plus1
|
||||
assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
|
||||
(rp & ~wsign) |
|
||||
(rm & wsign);
|
||||
always @ (FrmE, v, wsign, sticky) begin
|
||||
case (FrmE)
|
||||
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
|
||||
3'b001: plus1 = 0;//round to zero
|
||||
3'b010: plus1 = wsign;//round down
|
||||
3'b011: plus1 = ~wsign;//round up
|
||||
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
|
||||
default: plus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
|
||||
// (rp & ~wsign) |
|
||||
// (rm & wsign);
|
||||
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
|
||||
// rp && ~wsign && (v[1] || v[0]) ||
|
||||
// rm && wsign && (v[1] || v[0]);
|
||||
@ -84,17 +89,17 @@ module round(v, sticky, rz, rn, rp, rm, wsign,
|
||||
// inputs to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = overflow || underflow || invalid ||
|
||||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
|
||||
nan || inf;
|
||||
assign specialres = invalid | nan ? nanres : //KEP added nan
|
||||
overflow ? infinityres :
|
||||
assign specialres = FmaFlagsM[4] | nan ? nanres : //invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 52'b0 :
|
||||
underflow ? 52'b0 : 52'bx; // default to undefined
|
||||
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
|
||||
|
||||
// Overflow is handled differently for different rounding modes
|
||||
// Round is to either infinity or to maximum finite number
|
||||
|
||||
assign infinity = rn || (rp && ~wsign) || (rm && wsign);
|
||||
assign infinity = |FrmE;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
|
@ -10,23 +10,24 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm, overflow,
|
||||
sumzero, nan, invalid, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign);
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, FrmE, FmaFlagsM, zzero,
|
||||
sumzero, nan, xinf, yinf, zinf, inf, wsign, invz, negsum, selsum1, psign, isAdd);
|
||||
////////////////////////////////////////////////////////////////////////////I
|
||||
|
||||
input xsign; // Sign of X
|
||||
input ysign; // Sign of Y
|
||||
input zsign; // Sign of Z
|
||||
input zzero;
|
||||
input isAdd;
|
||||
input negsum0; // Sum in +O mode is negative
|
||||
input negsum1; // Sum in +1 mode is negative
|
||||
input bs; // sticky bit from addend
|
||||
input ps; // sticky bit from product
|
||||
input killprod; // Product forced to zero
|
||||
input rm; // Round toward minus infinity
|
||||
input overflow; // Round toward minus infinity
|
||||
input [2:0] FrmE; // Round toward minus infinity
|
||||
input [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input sumzero; // Sum = O
|
||||
input nan; // Some input is NaN
|
||||
input invalid; // Result invalid
|
||||
input xinf; // X = Inf
|
||||
input yinf; // Y = Inf
|
||||
input zinf; // Y = Inf
|
||||
@ -96,10 +97,24 @@ logic tmp;
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
|
||||
|
||||
assign zerosign = (~invz && killprod) ? zsign : rm;
|
||||
//assign zerosign = (~invz && killprod) ? zsign : rm;//***look into
|
||||
// assign zerosign = (~invz && killprod) ? zsign : 0;
|
||||
// zero sign
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// addition
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
// subtraction
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
|
||||
assign zerosign = FmaFlagsM[1] ? psign :
|
||||
(isAdd ? (psign^zsign ? FrmE == 3'b010 : psign) :
|
||||
(psign^zsign ? psign : FrmE == 3'b010));
|
||||
assign infsign = zinf ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
|
||||
//assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
|
||||
assign tmp = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
|
||||
assign wsign = invalid ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
|
||||
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
|
||||
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
|
||||
|
||||
endmodule
|
||||
|
@ -10,49 +10,49 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module special(x, y, z, ae, xzero, yzero, zzero,
|
||||
module special(ReadData1E, ReadData2E, ReadData3E, ae, xzero, yzero, zzero,
|
||||
xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, xinf, yinf, zinf);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] x; // Input x
|
||||
input [63:0] y; // Input Y
|
||||
input [63:0] z; // Input z
|
||||
input [63:0] ReadData1E; // Input ReadData1E
|
||||
input [63:0] ReadData2E; // Input ReadData2E
|
||||
input [63:0] ReadData3E; // Input ReadData3E
|
||||
input [12:0] ae; // exponent of product
|
||||
output xzero; // Input x = 0
|
||||
output yzero; // Input y = 0
|
||||
output zzero; // Input z = 0
|
||||
output xnan; // x is NaN
|
||||
output ynan; // y is NaN
|
||||
output znan; // z is NaN
|
||||
output xdenorm; // x is denormalized
|
||||
output ydenorm; // y is denormalized
|
||||
output zdenorm; // z is denormalized
|
||||
output xzero; // Input ReadData1E = 0
|
||||
output yzero; // Input ReadData2E = 0
|
||||
output zzero; // Input ReadData3E = 0
|
||||
output xnan; // ReadData1E is NaN
|
||||
output ynan; // ReadData2E is NaN
|
||||
output znan; // ReadData3E is NaN
|
||||
output xdenorm; // ReadData1E is denormalized
|
||||
output ydenorm; // ReadData2E is denormalized
|
||||
output zdenorm; // ReadData3E is denormalized
|
||||
output proddenorm; // product is denormalized
|
||||
output xinf; // x is infinity
|
||||
output yinf; // y is infinity
|
||||
output zinf; // z is infinity
|
||||
output xinf; // ReadData1E is infinity
|
||||
output yinf; // ReadData2E is infinity
|
||||
output zinf; // ReadData3E is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
|
||||
// Check if input is NaN
|
||||
|
||||
assign xnan = &x[62:52] && |x[51:0];
|
||||
assign ynan = &y[62:52] && |y[51:0];
|
||||
assign znan = &z[62:52] && |z[51:0];
|
||||
assign xnan = &ReadData1E[62:52] && |ReadData1E[51:0];
|
||||
assign ynan = &ReadData2E[62:52] && |ReadData2E[51:0];
|
||||
assign znan = &ReadData3E[62:52] && |ReadData3E[51:0];
|
||||
|
||||
// Check if input is denormalized
|
||||
|
||||
assign xdenorm = ~(|x[62:52]) && |x[51:0];
|
||||
assign ydenorm = ~(|y[62:52]) && |y[51:0];
|
||||
assign zdenorm = ~(|z[62:52]) && |z[51:0];
|
||||
assign xdenorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
|
||||
assign ydenorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
|
||||
assign zdenorm = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
|
||||
assign proddenorm = &ae & ~xzero & ~yzero; //KEP is the product denormalized
|
||||
|
||||
// Check if input is infinity
|
||||
|
||||
assign xinf = &x[62:52] && ~(|x[51:0]);
|
||||
assign yinf = &y[62:52] && ~(|y[51:0]);
|
||||
assign zinf = &z[62:52] && ~(|z[51:0]);
|
||||
assign xinf = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
|
||||
assign yinf = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
|
||||
assign zinf = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
|
||||
|
||||
// Check if inputs are all zero
|
||||
// Also forces denormalized inputs to zero.
|
||||
@ -60,11 +60,11 @@ module special(x, y, z, ae, xzero, yzero, zzero,
|
||||
// to just check if the exponent is zero.
|
||||
|
||||
// KATHERINE - commented following (21/01/11)
|
||||
// assign xzero = ~(|x[62:0]) || xdenorm;
|
||||
// assign yzero = ~(|y[62:0]) || ydenorm;
|
||||
// assign zzero = ~(|z[62:0]) || zdenorm;
|
||||
// assign xzero = ~(|ReadData1E[62:0]) || xdenorm;
|
||||
// assign yzero = ~(|ReadData2E[62:0]) || ydenorm;
|
||||
// assign zzero = ~(|ReadData3E[62:0]) || zdenorm;
|
||||
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
|
||||
assign xzero = ~(|x[62:0]);
|
||||
assign yzero = ~(|y[62:0]);
|
||||
assign zzero = ~(|z[62:0]);
|
||||
assign xzero = ~(|ReadData1E[62:0]);
|
||||
assign yzero = ~(|ReadData2E[62:0]);
|
||||
assign zzero = ~(|ReadData3E[62:0]);
|
||||
endmodule
|
||||
|
@ -1,16 +1 @@
|
||||
0010000000000000 bf4fdffffff7fffe 800ffffffffffffe 800003fbfffffefe 801003fbfffffefe Wrong zdenorm 308227
|
||||
0010000000000000 be6fffffbffffff7 8000000000000000 800000001fffffc0 800000000fffffe0 Wrong 313753
|
||||
001ffffffffffffe 3fddfbffffffffff 000ffffffffffffe 000efdfffffffffd 001efdfffffffffd Wrong zdenorm 551371
|
||||
3befe000ffffffff 800ffffffffffffe 0000000000000000 0000000000000000 8000000000000000 Wrong ydenorm unflw 665575
|
||||
000007fffffffffe 3f6ffffffe01fffe 000ffffffffffffe 00000007ffffff7e 00100007ffffff7e Wrong xdenorm zdenorm 768727
|
||||
3fdffffffffffffe 000ffffffffffffe 8000000000000001 7feffffffffffff6 0007fffffffffffe Wrong ydenorm zdenorm 1049939
|
||||
7fe0000000000001 4000000000000000 ffefffffffffffff 7ff0000000000000 7cb8000000000000 Wrong w=+inf 2602745
|
||||
000fff000000000f 3ff00800001fffff 8010000000000000 7f7bfe007ff8381e 000006ff801ffe0e Wrong xdenorm 3117277
|
||||
8000000000000001 40211275ffe5ee3c 0000000000000001 fcfe24ebffcbdc78 8000000000000008 Wrong xdenorm zdenorm 3148591
|
||||
801fffffffffffff bfdffffffffffffe 0000000000021fff 0000000000021ffe 0010000000021ffe Wrong zdenorm 3537867
|
||||
801ffffffffffffe 0010000000000001 0000000000000000 0000000000000000 8000000000000000 Wrong unflw 3564269
|
||||
bca0000000000001 000fffffc000001e 8000000000000000 8000000000000001 8000000000000000 Wrong ydenorm 3717769
|
||||
bcafffffffffffff 800ffffffffffffe 8000000000000000 0000000000000002 0000000000000001 Wrong ydenorm 3807413
|
||||
7fec5fed92358a74 400000001bffffff ffefc0003ffffffe 7ff0000000000000 7fe8ffdb47bad466 Wrong w=+inf 3889689
|
||||
bfdfffffffffffff 3fdf1f3616aa73e1 3fd0000000000001 3fd07064f4aac611 3f7c193d2ab1843f Wrong 4099063
|
||||
3fd07dfffffffffe 8010000000000001 0000000000000001 ffe07dfffffffffb 80041f7fffffffff Wrong zdenorm 4716133
|
||||
c3f000200003fffe 0000000000000001 001ffffffffffffe 80cffc400007fffd 80cffc400007fffc Wrong FmaResultM= -64 ydenorm 1119653
|
||||
|
Binary file not shown.
@ -20,19 +20,19 @@ void main() {
|
||||
// b68ffff8000000ff_3f9080000007ffff_b6307ffbe0080080_00001
|
||||
char ch;
|
||||
int i,j,n;
|
||||
char x[17];
|
||||
char y[17];
|
||||
char z[17];
|
||||
char ReadData1E[17];
|
||||
char ReadData2E[17];
|
||||
char ReadData3E[17];
|
||||
char ans[81];
|
||||
char flags[3];
|
||||
int rn,rz,rm,rp;
|
||||
long stop = 4099063;
|
||||
int FrmE;
|
||||
long stop = 1119653;
|
||||
int debug = 1;
|
||||
//my_string = (char *) malloc (nbytes + 1);
|
||||
//bytes_read = getline (&my_string, &nbytes, stdin);
|
||||
|
||||
|
||||
for(n=0; n < 613; n++) {//613 for 10000
|
||||
for(n=0; n < 305; n++) {//613 for 10000
|
||||
if(getline(&ln,&nbytes,fp) < 0 || feof(fp)) break;
|
||||
if(k == stop && debug == 1) break;
|
||||
k++;
|
||||
@ -41,71 +41,59 @@ void main() {
|
||||
|
||||
if(!feof(fp)) {
|
||||
|
||||
strncpy(x, ln, 16); x[16]=0;
|
||||
strncpy(y, &ln[17], 16); y[16]=0;
|
||||
strncpy(z, &ln[34], 16); z[16]=0;
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
|
||||
strncpy(ReadData1E, ln, 16); ReadData1E[16]=0;
|
||||
strncpy(ReadData2E, &ln[17], 16); ReadData2E[16]=0;
|
||||
strncpy(ReadData3E, &ln[34], 16); ReadData3E[16]=0;
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
|
||||
strncpy(ans, &ln[51], 16); ans[16]=0;
|
||||
strncpy(flags,&ln[68],2); flags[2]=0;
|
||||
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,z);
|
||||
fprintf(fq," x = 64'h%s;\n",x);
|
||||
fprintf(fq," y = 64'h%s;\n",y);
|
||||
fprintf(fq," z = 64'h%s;\n",z);
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,ReadData3E);
|
||||
fprintf(fq," ReadData1E = 64'h%s;\n",ReadData1E);
|
||||
fprintf(fq," ReadData2E = 64'h%s;\n",ReadData2E);
|
||||
fprintf(fq," ReadData3E = 64'h%s;\n",ReadData3E);
|
||||
fprintf(fq," ans = 64'h%s;\n", ans);
|
||||
// fprintf(fq," flags = 5'h%s;\n", flags);
|
||||
|
||||
|
||||
{
|
||||
//rn=1; rz=0; rm=0; rp=0;
|
||||
fprintf(fq," rn = %d;\n",1);
|
||||
fprintf(fq," rz = %d;\n", 0);
|
||||
fprintf(fq," rm = %d;\n", 0);
|
||||
fprintf(fq," rp = %d;\n", 0);
|
||||
}
|
||||
{
|
||||
fprintf(fq," earlyres = 64'b0;\n");
|
||||
fprintf(fq," earlyressel = 0;\n");
|
||||
}
|
||||
{
|
||||
|
||||
fprintf(fq," bypsel= 2'b0;\n"); //, bysel);
|
||||
fprintf(fq," bypplus1 = 0;\n"); //, byp1);
|
||||
fprintf(fq," byppostnorm = 0;\n"); //, bypnorm);
|
||||
fprintf(fq," FrmE = 3'b000;\n");
|
||||
}
|
||||
fprintf(fq,"#10\n");
|
||||
// IEEE 754-2008 section 6.3 states "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",x,y,w, ans);\n");
|
||||
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",ReadData1E,ReadData2E,FmaResultM, ans);\n");
|
||||
fprintf(fq," // IEEE 754-2008 section 6.3 states: \"When ether an input or result is NaN, this\n");
|
||||
fprintf(fq," // standard does not interpret the sign of a NaN.\"\n");
|
||||
fprintf(fq," wnan = &w[62:52] && |w[51:0]; \n");
|
||||
fprintf(fq," xnan = &x[62:52] && |x[51:0]; \n");
|
||||
fprintf(fq," ynan = &y[62:52] && |y[51:0]; \n");
|
||||
fprintf(fq," znan = &z[62:52] && |z[51:0]; \n");
|
||||
fprintf(fq," wnan = &FmaResultM[62:52] && |FmaResultM[51:0]; \n");
|
||||
fprintf(fq," xnan = &ReadData1E[62:52] && |ReadData1E[51:0]; \n");
|
||||
fprintf(fq," ynan = &ReadData2E[62:52] && |ReadData2E[51:0]; \n");
|
||||
fprintf(fq," znan = &ReadData3E[62:52] && |ReadData3E[51:0]; \n");
|
||||
fprintf(fq," ansnan = &ans[62:52] && |ans[51:0]; \n");
|
||||
fprintf(fq," xnorm = ~(|x[62:52]) && |x[51:0] ? {x[50:0], 1'b0} : x; \n");
|
||||
fprintf(fq," ynorm = ~(|y[62:52]) && |y[51:0] ? {y[50:0], 1'b0} : y;\n");
|
||||
fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
|
||||
// fprintf(fq," if(!(~(|x[62:52]) && |x[51:0] || ~(|y[62:52]) && |y[51:0])) begin\n");
|
||||
fprintf(fq," xnorm = ~(|ReadData1E[62:52]) && |ReadData1E[51:0] ? {ReadData1E[50:0], 1'b0} : ReadData1E; \n");
|
||||
fprintf(fq," ynorm = ~(|ReadData2E[62:52]) && |ReadData2E[51:0] ? {ReadData2E[50:0], 1'b0} : ReadData2E;\n");
|
||||
// fprintf(fq," s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm}; \n");
|
||||
// fprintf(fq," if(!(~(|ReadData1E[62:52]) && |ReadData1E[51:0] || ~(|ReadData2E[62:52]) && |ReadData2E[51:0])) begin\n");
|
||||
// not looknig at negative zero results right now
|
||||
//fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) && !(w == 64'h8000000000000000 && ans == 64'b0)) begin\n");
|
||||
// fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) ) begin\n");
|
||||
fprintf(fq," if((!wnan && (w != ans)) || (wnan && ansnan && ~(((xnan && (w[62:0] == {x[62:52],1'b1,x[50:0]})) || (ynan && (w[62:0] == {y[62:52],1'b1,y[50:0]})) || (znan && (w[62:0] == {z[62:52],1'b1,z[50:0]})) || (w[62:0] == ans[62:0])) ))) begin\n");
|
||||
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",x,y, z, w, ans);\n");
|
||||
//fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) && !(FmaResultM == 64'h8000000000000000 && ans == 64'b0)) begin\n");
|
||||
// fprintf(fq," if( (nan && (FmaResultM[62:0] != ans[62:0])) || (!nan && (FmaResultM != ans)) ) begin\n");
|
||||
fprintf(fq," if((!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {ReadData1E[62:52],1'b1,ReadData1E[50:0]})) || (ynan && (FmaResultM[62:0] == {ReadData2E[62:52],1'b1,ReadData2E[50:0]})) || (znan && (FmaResultM[62:0] == {ReadData3E[62:52],1'b1,ReadData3E[50:0]})) || (FmaResultM[62:0] == ans[62:0])) ))) begin\n");
|
||||
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",ReadData1E,ReadData2E, ReadData3E, FmaResultM, ans);\n");
|
||||
//fprintf(fq," $fwrite(fp, \"%%h \",s);\n");
|
||||
fprintf(fq," if(w == 64'h8000000000000000) $fwrite(fp, \"w=-zero \");\n");
|
||||
fprintf(fq," if(~(|x[62:52]) && |x[51:0]) $fwrite(fp, \"xdenorm \");\n");
|
||||
fprintf(fq," if(~(|y[62:52]) && |y[51:0]) $fwrite(fp, \"ydenorm \");\n");
|
||||
fprintf(fq," if(~(|z[62:52]) && |z[51:0]) $fwrite(fp, \"zdenorm \");\n");
|
||||
fprintf(fq," if(invalid != 0) $fwrite(fp, \"invld \");\n");
|
||||
fprintf(fq," if(overflow != 0) $fwrite(fp, \"ovrflw \");\n");
|
||||
fprintf(fq," if(underflow != 0) $fwrite(fp, \"unflw \");\n");
|
||||
fprintf(fq," if(w == 64'hFFF0000000000000) $fwrite(fp, \"w=-inf \");\n");
|
||||
fprintf(fq," if(w == 64'h7FF0000000000000) $fwrite(fp, \"w=+inf \");\n");
|
||||
fprintf(fq," if(w > 64'h7FF0000000000000 && w < 64'h7FF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
|
||||
fprintf(fq," if(w > 64'hFFF8000000000000 && w < 64'hFFF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
|
||||
fprintf(fq," if(w >= 64'h7FF8000000000000 && w <= 64'h7FFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
|
||||
fprintf(fq," if(w >= 64'hFFF8000000000000 && w <= 64'hFFFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
|
||||
fprintf(fq," $fwrite(fp, \"FmaResultM=%%d \",$signed(aligncnt));\n");
|
||||
fprintf(fq," if(FmaResultM == 64'h8000000000000000) $fwrite(fp, \"FmaResultM=-zero \");\n");
|
||||
fprintf(fq," if(~(|ReadData1E[62:52]) && |ReadData1E[51:0]) $fwrite(fp, \"xdenorm \");\n");
|
||||
fprintf(fq," if(~(|ReadData2E[62:52]) && |ReadData2E[51:0]) $fwrite(fp, \"ydenorm \");\n");
|
||||
fprintf(fq," if(~(|ReadData3E[62:52]) && |ReadData3E[51:0]) $fwrite(fp, \"zdenorm \");\n");
|
||||
fprintf(fq," if(FmaFlagsM[4] != 0) $fwrite(fp, \"invld \");\n");
|
||||
fprintf(fq," if(FmaFlagsM[2] != 0) $fwrite(fp, \"ovrflw \");\n");
|
||||
fprintf(fq," if(FmaFlagsM[1] != 0) $fwrite(fp, \"unflw \");\n");
|
||||
fprintf(fq," if(FmaResultM == 64'hFFF0000000000000) $fwrite(fp, \"FmaResultM=-inf \");\n");
|
||||
fprintf(fq," if(FmaResultM == 64'h7FF0000000000000) $fwrite(fp, \"FmaResultM=+inf \");\n");
|
||||
fprintf(fq," if(FmaResultM > 64'h7FF0000000000000 && FmaResultM < 64'h7FF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
|
||||
fprintf(fq," if(FmaResultM > 64'hFFF8000000000000 && FmaResultM < 64'hFFF8000000000000 ) $fwrite(fp, \"FmaResultM=sigNaN \");\n");
|
||||
fprintf(fq," if(FmaResultM >= 64'h7FF8000000000000 && FmaResultM <= 64'h7FFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
|
||||
fprintf(fq," if(FmaResultM >= 64'hFFF8000000000000 && FmaResultM <= 64'hFFFfffffffffffff ) $fwrite(fp, \"FmaResultM=qutNaN \");\n");
|
||||
|
||||
fprintf(fq," if(ans == 64'hFFF0000000000000) $fwrite(fp, \"ans=-inf \");\n");
|
||||
fprintf(fq," if(ans == 64'h7FF0000000000000) $fwrite(fp, \"ans=+inf \");\n");
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,38 +2,27 @@
|
||||
module tb;
|
||||
|
||||
|
||||
reg [63:0] x;
|
||||
reg [63:0] y;
|
||||
reg [63:0] z;
|
||||
reg [63:0] ans;
|
||||
reg rn;
|
||||
reg rz;
|
||||
reg rm;
|
||||
reg rp;
|
||||
reg [63:0] earlyres;
|
||||
reg earlyressel;
|
||||
reg [1:0] bypsel;
|
||||
reg bypplus1;
|
||||
reg byppostnorm;
|
||||
wire [63:0] w;
|
||||
wire [63:0] wbypass;
|
||||
wire invalid;
|
||||
wire overflow;
|
||||
wire underflow;
|
||||
wire inexact;
|
||||
reg [63:0] ReadData1E;
|
||||
reg [63:0] ReadData2E;
|
||||
reg [63:0] ReadData3E;
|
||||
reg [63:0] ans;
|
||||
reg [2:0] FrmE;
|
||||
wire [63:0] FmaResultM;
|
||||
wire [4:0] FmaFlagsM;
|
||||
|
||||
integer fp;
|
||||
reg wnan;
|
||||
reg xnan;
|
||||
reg ynan;
|
||||
reg znan;
|
||||
wire [12:0] aligncnt;
|
||||
reg ansnan;
|
||||
reg [105:0] s; // partial product 2
|
||||
reg [51:0] xnorm;
|
||||
reg [51:0] ynorm;
|
||||
|
||||
localparam period = 20;
|
||||
fmac UUT(.*);
|
||||
fma UUT(.*);
|
||||
|
||||
|
||||
initial
|
||||
|
@ -1 +1 @@
|
||||
testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
|
||||
testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat
|
||||
|
@ -50,7 +50,7 @@ module FA_array (S, C, A, B, Ci) ;
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < n; i = i + 1) begin : index
|
||||
fa FA1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]), .Ci(Ci[i]));
|
||||
fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user