forked from Github_Repos/cvw
inital FMA push
This commit is contained in:
parent
64536dbc34
commit
7b103423e1
61
wally-pipelined/src/fpu/FMA/add.v
Normal file
61
wally-pipelined/src/fpu/FMA/add.v
Normal file
@ -0,0 +1,61 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Block Name: add.v
|
||||
// Author: David Harris
|
||||
// Date: 11/12/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the addition of the product and addend. It also
|
||||
// contains logic necessary to adjust the signs for effective subtracts
|
||||
// and negative results.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
module add(r[105:0], s[105:0], t[157:0], sum[157:0],
|
||||
negsum, invz, selsum1, killprod, negsum0, negsum1, proddenorm);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [105:0] r; // partial product 1
|
||||
input [105:0] s; // partial product 2
|
||||
input [157:0] t; // aligned addend
|
||||
input invz; // invert addend
|
||||
input selsum1; // select +1 mode of compound adder
|
||||
input killprod; // z >> product
|
||||
input negsum; // Negate sum
|
||||
input proddenorm;
|
||||
output [157:0] sum; // sum
|
||||
output negsum0; // sum was negative in +0 mode
|
||||
output negsum1; // sum was negative in +1 mode
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire [105:0] r2; // partial product possibly zeroed out
|
||||
wire [105:0] s2; // partial product possibly zeroed out
|
||||
wire [157:0] t2; // addend after inversion if necessary
|
||||
wire [157:0] sum0; // sum of compound adder +0 mode
|
||||
wire [157:0] sum1; // sum of compound adder +1 mode
|
||||
|
||||
// Invert addend if necessary
|
||||
|
||||
assign t2 = invz ? -t : t;
|
||||
|
||||
// Zero out product if Z >> product or product really should be zero
|
||||
|
||||
assign r2 = ~proddenorm & killprod ? 106'b0 : r;
|
||||
assign s2 = ~proddenorm & killprod ? 106'b0 : s;
|
||||
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
|
||||
assign sum0 = {52'b0, r2} + {52'b0, s2} + t2 + 158'b0;
|
||||
assign sum1 = {52'b0, r2} + {52'b0, s2} + t2 + 158'b1;
|
||||
|
||||
// Check sign bits in +0/1 modes
|
||||
assign negsum0 = sum0[157];
|
||||
assign negsum1 = sum1[157];
|
||||
|
||||
// Mux proper result (+Oil mode and inversion) using 4:1 mux
|
||||
|
||||
assign sum = selsum1 ? (negsum ? ~sum1 : sum1) : (negsum ? ~sum0 : sum0);
|
||||
|
||||
endmodule
|
99
wally-pipelined/src/fpu/FMA/align.v
Normal file
99
wally-pipelined/src/fpu/FMA/align.v
Normal file
@ -0,0 +1,99 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: align.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the alignment shifter. It is responsible for
|
||||
// adjusting the fraction portion of the addend relative to the fraction
|
||||
// produced in the multiplier array.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module align(z[51:0], ae[12:0], aligncnt, xzero, yzero, zzero, zdenorm, proddenorm, t[157:0], bs, ps,
|
||||
killprod, bypsel[1], bypplus1, byppostnorm);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [51:0] z; // Fraction of addend z;
|
||||
input [12:0] ae; // sign of exponent of addend z;
|
||||
input [11:0] aligncnt; // amount to shift
|
||||
input xzero; // Input X = 0
|
||||
input yzero; // Input Y = 0
|
||||
input zzero; // Input Z = 0
|
||||
input zdenorm; // Input Z = denorm
|
||||
input proddenorm;
|
||||
input [1:1] bypsel; // Select bypass to X or Z
|
||||
input bypplus1; // Add one to bypassed result
|
||||
input byppostnorm; // Postnormalize bypassed result
|
||||
output [157:0] t; // aligned addend (54 bits left of bpt)
|
||||
output bs; // sticky bit of addend
|
||||
output ps; // sticky bit of product
|
||||
output killprod; // Z >> product
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [157:0] t; // aligned addend from shifter
|
||||
reg killprod; // Z >> product
|
||||
reg bs; // sticky bit of addend
|
||||
reg ps; // sticky bit of product
|
||||
reg [7:0] i; // temp storage for finding sticky bit
|
||||
wire [52:0] z1; // Z plus 1
|
||||
wire [51:0] z2; // Z selected after handling rounds
|
||||
wire [11:0] align104; // alignment count + 104
|
||||
|
||||
// Increment fraction of Z by one if necessary for prerounded bypass
|
||||
// This incrementor delay is masked by the alignment count computation
|
||||
|
||||
assign z1 = z + 1;
|
||||
assign z2 = bypsel[1] && bypplus1 ? (byppostnorm ? z1[52:1] : z1[51:0]): z;
|
||||
|
||||
// Compute sign of aligncnt + 104 to check for shifting too far right
|
||||
|
||||
assign align104 = aligncnt+104;
|
||||
|
||||
// Shift addend by alignment count. Generate sticky bits from
|
||||
// addend on right shifts. Handle special cases of shifting
|
||||
// by too much.
|
||||
|
||||
always @(z2 or aligncnt or align104 or zzero or xzero or yzero or zdenorm)
|
||||
begin
|
||||
|
||||
// Default to clearing sticky bits
|
||||
bs = 0;
|
||||
ps = 0;
|
||||
|
||||
// And to using product as primary operand in adder I exponent gen
|
||||
killprod = 0;
|
||||
|
||||
if(zzero) begin
|
||||
t = 158'b0;
|
||||
if (xzero || yzero) killprod = 1;
|
||||
end else if ((aligncnt > 53 && ~aligncnt[11]) || xzero || yzero) begin
|
||||
// Left shift by huge amount
|
||||
// or product = 0
|
||||
t = {53'b0, ~zzero, z2, 52'b0};
|
||||
killprod = 1;
|
||||
ps = ~xzero && ~yzero;
|
||||
end else if ((ae[12] && align104[11])) begin //***fix the if statement
|
||||
// KEP if the multiplier's exponent overflows
|
||||
t = {53'b0, ~zzero, z2, 52'b0};
|
||||
killprod = 1;
|
||||
ps = ~xzero && ~yzero;
|
||||
end else if(align104[11]) begin // Right shift by huge amount
|
||||
bs = ~zzero;
|
||||
t = 0;
|
||||
end else if (~aligncnt[11]) begin // Left shift by reasonable amount
|
||||
t = {53'b0, ~zzero, z2, 52'b0} << aligncnt;
|
||||
end else begin // Otherwise right shift
|
||||
t = {53'b0, ~zzero, z2, 52'b0} >> -aligncnt;
|
||||
|
||||
// use some behavioral code to find sticky bit. This is really
|
||||
// done by hardware in the shifter.
|
||||
if (aligncnt < 0)
|
||||
for (i=0; i<-aligncnt-52; i = i+1)
|
||||
bs = bs || z2[i];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
114
wally-pipelined/src/fpu/FMA/array.sv
Normal file
114
wally-pipelined/src/fpu/FMA/array.sv
Normal file
@ -0,0 +1,114 @@
|
||||
|
||||
module array(x, y, xdenorm, ydenorm, r, s, bypsel, bypplus1);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [51:0] x; // Fraction of multiplicand x
|
||||
input [51:0] y; // Fraction of multiplicand y
|
||||
input xdenorm; // is x denormalized
|
||||
input ydenorm; // is y denormalized
|
||||
input bypsel; // Bypass X
|
||||
input bypplus1; // Add 1 to X to handle rounding
|
||||
output [105:0] r; // partial product 1
|
||||
output [105:0] s; // partial product 2
|
||||
|
||||
wire [51:0] xnorm;
|
||||
wire [51:0] ynorm;
|
||||
|
||||
wire [54:0] yExt; //y with appended 0 and assumed 1
|
||||
wire [53:0] xExt; //y with assumed 1
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [17:0][105:0] lv1add;
|
||||
logic [11:0][105:0] lv2add;
|
||||
logic [7:0][105:0] lv3add;
|
||||
logic [3:0][105:0] lv4add;
|
||||
logic [21:0][106:0] carryTmp;
|
||||
wire [26:0][105:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
assign xnorm = xdenorm ? {x[50:0], 1'b0} : x; // normalization of denormalized numbers
|
||||
assign ynorm = ydenorm ? {y[50:0], 1'b0} : y;
|
||||
assign yExt = {2'b01,ynorm,1'b0}; // y extended and added assumed 1
|
||||
assign xExt = {2'b01,xnorm}; // x with added assumed 1
|
||||
|
||||
|
||||
//booth encoding
|
||||
|
||||
generate
|
||||
for(i=0; i<27; i=i+1) begin
|
||||
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//*** resize adders
|
||||
generate
|
||||
for(i=0; i<9; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<6; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<4; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
generate
|
||||
for(i=0; i<2; i=i+1) begin
|
||||
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(s));
|
||||
assign r = {carryTmp[21][104:0], 1'b0};
|
||||
|
||||
// assign r = 106'b0;
|
||||
// assign s = ({54'b1,xnorm} + (bypsel && bypplus1)) * {54'b1,ynorm};
|
||||
|
||||
endmodule
|
55
wally-pipelined/src/fpu/FMA/booth.sv
Normal file
55
wally-pipelined/src/fpu/FMA/booth.sv
Normal file
@ -0,0 +1,55 @@
|
||||
module booth(xExt, choose, add1, e, pp);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] xExt; // multiplicand xExt
|
||||
input [2:0] choose; // bits needed to choose which encoding
|
||||
output [1:0] add1; // do you add 1
|
||||
output e;
|
||||
output [54:0] pp; // the resultant encoding
|
||||
|
||||
logic [54:0] pp, temp;
|
||||
logic e;
|
||||
logic [1:0] add1;
|
||||
logic [53:0] negx;
|
||||
//logic temp;
|
||||
|
||||
assign negx = ~xExt;
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
case (choose)
|
||||
3'b000 : pp = 55'b0; // 0
|
||||
3'b001 : pp = {xExt[53], xExt}; // 1
|
||||
3'b010 : pp = {xExt[53], xExt}; // 1
|
||||
3'b011 : pp = {xExt, 1'b0}; // 2
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {negx[53], negx}; // -1
|
||||
3'b110 : pp = {negx[53], negx}; // -1
|
||||
3'b111 : pp = 55'hfffffffffffffff; // -0
|
||||
endcase
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
case (choose)
|
||||
3'b000 : e = 0; // 0
|
||||
3'b001 : e = xExt[53]; // 1
|
||||
3'b010 : e = xExt[53]; // 1
|
||||
3'b011 : e = xExt[53]; // 2
|
||||
3'b100 : e = negx[53]; // -2
|
||||
3'b101 : e = negx[53]; // -1
|
||||
3'b110 : e = negx[53]; // -1
|
||||
3'b111 : e = 1; // -0
|
||||
endcase
|
||||
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
|
||||
// assign add1 = choose[2];
|
||||
always @(choose)
|
||||
case (choose)
|
||||
3'b000 : add1 = 2'b0; // 0
|
||||
3'b001 : add1 = 2'b0; // 1
|
||||
3'b010 : add1 = 2'b0; // 1
|
||||
3'b011 : add1 = 2'b0; // 2
|
||||
3'b100 : add1 = 2'b10; // -2
|
||||
3'b101 : add1 = 2'b1; // -1
|
||||
3'b110 : add1 = 2'b1; // -1
|
||||
3'b111 : add1 = 2'b1; // -0
|
||||
endcase
|
||||
|
||||
endmodule
|
30
wally-pipelined/src/fpu/FMA/bypass.v
Normal file
30
wally-pipelined/src/fpu/FMA/bypass.v
Normal file
@ -0,0 +1,30 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Block Name: bypass.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block contains the bypass muxes which allow fast prerounded
|
||||
// bypass to the X and Z inputs of the FMAC
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module bypass(xrf[63:0], zrf[63:0], wbypass[63:0], bypsel[1:0],
|
||||
x[63:0], z[63:0]);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] xrf; // X from register file
|
||||
input [63:0] zrf; // Z from register file
|
||||
input [63:0] wbypass; // Prerounded result for bypass
|
||||
input [1:0] bypsel; // Select bypass to X or Z
|
||||
output [63:0] x; // Source X
|
||||
output [63:0] z; // Source Z
|
||||
|
||||
// If bypass select is asserted, bypass source, else take reg file value
|
||||
|
||||
assign x = bypsel[0] ? wbypass : xrf;
|
||||
assign z = bypsel[1] ? wbypass : zrf;
|
||||
|
||||
endmodule
|
90
wally-pipelined/src/fpu/FMA/compressors.sv
Normal file
90
wally-pipelined/src/fpu/FMA/compressors.sv
Normal file
@ -0,0 +1,90 @@
|
||||
module add3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
output [BITS-1:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
for(i= 0; i<BITS; i=i+1) begin
|
||||
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
module add4comp2(a, b, c, d, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
input [BITS-1:0] d;
|
||||
output [BITS:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
|
||||
logic [BITS-1:0] cout;
|
||||
logic carryTmp;
|
||||
genvar i;
|
||||
|
||||
|
||||
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
|
||||
|
||||
generate
|
||||
for(i= 1; i<BITS-1; i=i+1) begin
|
||||
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
|
||||
|
||||
assign carry[BITS-1] = carryTmp & cout[BITS-1];
|
||||
assign carry[BITS] = carryTmp ^ cout[BITS-1];
|
||||
|
||||
endmodule
|
||||
|
||||
module sng3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
output carry;
|
||||
output sum;
|
||||
|
||||
logic axorb;
|
||||
|
||||
assign axorb = a ^ b;
|
||||
assign sum = axorb ^ c;
|
||||
|
||||
assign carry = axorb ? c : a;
|
||||
|
||||
endmodule
|
||||
|
||||
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into pass gate 4:2 counters?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
input d;
|
||||
input cin;
|
||||
output cout;
|
||||
output carry;
|
||||
output sum;
|
||||
|
||||
logic TmpSum;
|
||||
|
||||
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
|
||||
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
|
||||
|
||||
endmodule
|
135
wally-pipelined/src/fpu/FMA/expgen.v
Normal file
135
wally-pipelined/src/fpu/FMA/expgen.v
Normal file
@ -0,0 +1,135 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen(x[62:52], y[62:52], z[62:52],
|
||||
earlyres[62:52], earlyressel, bypsel[1], byppostnorm,
|
||||
killprod, sumzero, postnormalize, normcnt, infinity,
|
||||
invalid, overflow, underflow, inf,
|
||||
nan, xnan, ynan, znan, zdenorm, specialsel,
|
||||
aligncnt, w[62:52], wbypass[62:52],
|
||||
prodof, sumof, sumuf, denorm0, ae[12:0]);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [62:52] x; // Exponent of multiplicand x
|
||||
input [62:52] y; // Exponent of multiplicand y
|
||||
input [62:52] z; // Exponent of addend z
|
||||
input [62:52] earlyres; // Result from other FPU block
|
||||
input earlyressel; // Select result from other block
|
||||
input [1:1] bypsel; // Bypass X or Z
|
||||
input byppostnorm; // Postnormalize bypassed result
|
||||
input killprod; // Z >> product
|
||||
input sumzero; // sum exactly equals zero
|
||||
input postnormalize; // postnormalize rounded result
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input infinity; // generate infinity on overflow
|
||||
input invalid; // Result invalid
|
||||
input overflow; // Result overflowed
|
||||
input underflow; // Result underflowed
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input zdenorm; // Z is denorm
|
||||
input specialsel; // Select special result
|
||||
output [11:0] aligncnt; // shift count for alignment shifter
|
||||
output [62:52] w; // Exponent of result
|
||||
output [62:52] wbypass; // Prerounded exponent for bypass
|
||||
output prodof; // X*Y exponent out of bounds
|
||||
output sumof; // X*Y+Z exponent out of bounds
|
||||
output sumuf; // X*Y+Z exponent underflows
|
||||
output denorm0; // exponent = 0 for denorm
|
||||
output [12:0] ae; //exponent of multiply
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire [12:0] aetmp; // Exponent of Multiply
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de0; // Normalized exponent
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
assign ae = x + y - 1023;
|
||||
|
||||
assign prodof = (ae > 2046 && ~ae[12] && ~killprod);
|
||||
|
||||
// Compute alignment shift count
|
||||
// Adjust for postrounding normalization of Z.
|
||||
// This should not increas the critical path because the time to
|
||||
// check if a round overflows is shorter than the actual round and
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
|
||||
assign aligncnt0 = z - ae[10:0] + 13'b0;
|
||||
assign aligncnt1 = z - ae[10:0] + 13'b1;
|
||||
assign aligncnt = bypsel[1] && byppostnorm ? aligncnt1 : aligncnt0;
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
assign be = killprod ? z : ae;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
assign de0 = sumzero ? 13'b0 : be + 53 - normcnt;
|
||||
assign de1 = sumzero ? 13'b0 : be + 53 - normcnt + 13'b1;
|
||||
|
||||
// If the exponent becomes exactly zero (denormalized)
|
||||
// signal such to adjust R bit before rounding
|
||||
|
||||
assign denorm0 = (de0 == 0);
|
||||
|
||||
// check for exponent out of bounds after add
|
||||
|
||||
assign de = postnormalize ? de1 : de0;
|
||||
assign sumof = de > 2046 && ~de[12];
|
||||
assign sumuf = (de == 0 || de[12]) && ~sumzero && ~zdenorm;//KEP ~zdenorm to prevent underflow flag
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = earlyressel ? earlyres :
|
||||
invalid ? nanres :
|
||||
overflow ? infinityres :
|
||||
inf ? 11'b11111111111 :
|
||||
underflow ? 11'b0 : 11'bx;
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
assign nanres = xnan ? x : (ynan ? y : (znan? z : 11'b11111111111));
|
||||
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign w = specialsel ? specialres[10:0] : de;
|
||||
endmodule
|
||||
|
85
wally-pipelined/src/fpu/FMA/flag.v
Normal file
85
wally-pipelined/src/fpu/FMA/flag.v
Normal file
@ -0,0 +1,85 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
psign, zsign, xzero, yzero, v[1:0],
|
||||
inf, nan, invalid, overflow, underflow, inexact);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input xinf; // X is Inf
|
||||
input yinf; // Y is Inf
|
||||
input zinf; // Z is Inf
|
||||
input prodof; // X*Y overflows exponent
|
||||
input sumof; // X*Y + z underflows exponent
|
||||
input sumuf; // X*Y + z underflows exponent
|
||||
input psign; // Sign of product
|
||||
input zsign; // Sign of z
|
||||
input xzero; // x = 0
|
||||
input yzero; // y = 0
|
||||
input [1:0] v; // R and S bits of result
|
||||
output inf; // Some source is Inf
|
||||
output nan; // Some source is NaN
|
||||
output invalid; // Result is invalid
|
||||
output overflow; // Result overflowed
|
||||
output underflow; // Result underflowed
|
||||
output inexact; // Result is not an exact number
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire prodinf; // X*Y larger than max possible
|
||||
wire suminf; // X*Y+Z larger than max possible
|
||||
|
||||
// If any input is NaN, propagate the NaN
|
||||
|
||||
assign nan = xnan || ynan || znan;
|
||||
|
||||
// Same with infinity (inf - inf and O * inf don't propagate inf
|
||||
// but it's ok becaue illegal op takes higher precidence)
|
||||
|
||||
assign inf= xinf || yinf || zinf;
|
||||
|
||||
// Generate infinity checks
|
||||
|
||||
assign prodinf = prodof && ~xnan && ~ynan;
|
||||
assign suminf = sumof && ~xnan && ~ynan && ~znan;
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign invalid = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
|
||||
xzero && yinf || yzero && xinf ||
|
||||
nan;
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign overflow = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input is denormalized
|
||||
// 2) Output would be denormalized or smaller
|
||||
|
||||
assign underflow = (sumuf && ~inf && ~prodinf && ~nan);
|
||||
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign inexact = (v[0] || v[1] || suminf) && ~(inf || nan);
|
||||
|
||||
endmodule
|
130
wally-pipelined/src/fpu/FMA/fmac.v
Normal file
130
wally-pipelined/src/fpu/FMA/fmac.v
Normal file
@ -0,0 +1,130 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: fmac.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This is the top level block of a floating-point multiply/accumulate
|
||||
// unit(FMAC). It instantiates the following sub-blocks:
|
||||
//
|
||||
// array Booth encoding, partial product generation, product summation
|
||||
// expgen Exponent summation, compare, and adjust
|
||||
// align Alignment shifter
|
||||
// add Carry-save adder for accumulate, carry propagate adder
|
||||
// lza Leading zero anticipator to control normalization shifter
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to X or Z inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes W=X*Y+Z, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the X or Z inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fmac(xrf, y, zrf, rn, rz, rp, rm,
|
||||
earlyres, earlyressel, bypsel, bypplus1, byppostnorm,
|
||||
w, wbypass, invalid, overflow, underflow, inexact);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] xrf; // input X from reg file
|
||||
input [63:0] y; // input Y
|
||||
input [63:0] zrf; // input Z from reg file
|
||||
input rn; // Round to Nearest
|
||||
input rz; // Round toward zero
|
||||
input rm; // Round toward minus infinity
|
||||
input rp; // Round toward plus infinity
|
||||
input [63:0] earlyres; // Early result from other FP logic
|
||||
input earlyressel; // Select early result, not W
|
||||
input [1:0] bypsel; // Select W bypass to X, or z
|
||||
input bypplus1; // Add one in bypass
|
||||
input byppostnorm; // postnormalize in bypass
|
||||
output [63:0] w; // output W=X*Y+Z
|
||||
output [63:0] wbypass; // prerounded output W=X*Y+Z for bypass
|
||||
output invalid; // Result is invalid
|
||||
output overflow; // Result overflowed
|
||||
output underflow; // Result underflowed
|
||||
output inexact; // Result is not an exact number
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire [63:0] x; // input X after bypass mux
|
||||
wire [63:0] z; // input Z after bypass mux
|
||||
wire [105:0] r; // one result of partial product sum
|
||||
wire [105:0] s; // other result of partial products
|
||||
wire [157:0] t; // output of alignment shifter
|
||||
wire [157:0] sum; // output of carry prop adder
|
||||
wire [53:0] v; // normalized sum, R, S bits
|
||||
wire [11:0] aligncnt; // shift count for alignment
|
||||
wire [8:0] normcnt; // shift count for normalizer
|
||||
wire [12:0] ae; // multiplier expoent
|
||||
wire bs; // sticky bit of addend
|
||||
wire ps; // sticky bit of product
|
||||
wire killprod; // Z >> product
|
||||
wire negsum; // negate sum
|
||||
wire invz; // invert addend
|
||||
wire selsum1; // select +1 mode of sum
|
||||
wire negsum0; // sum +0 < 0
|
||||
wire negsum1; // sum +1 < 0
|
||||
wire sumzero; // sum = 0
|
||||
wire infinity; // generate infinity on overflow
|
||||
wire prodof; // X*Y out of range
|
||||
wire sumof; // result out of range
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
array array(x[51:0], y[51:0], xdenorm, ydenorm, r[105:0], s[105:0],
|
||||
bypsel[0], bypplus1);
|
||||
align align(z[51:0], ae, aligncnt, xzero, yzero, zzero, zdenorm, proddenorm,
|
||||
t[157:0], bs, ps, killprod,
|
||||
bypsel[1], bypplus1, byppostnorm);
|
||||
add add(r[105:0], s[105:0], t[157:0], sum[157:0],
|
||||
negsum, invz, selsum1, killprod, negsum0, negsum1, proddenorm);
|
||||
lop lop(sum, normcnt, sumzero);
|
||||
normalize normalize(sum[157:0], normcnt, sumzero, bs, ps, denorm0, zdenorm,
|
||||
v[53:0]);
|
||||
round round(v[53:0], earlyres[51:0], earlyressel, rz, rn, rp, rm, w[63],
|
||||
invalid, overflow, underflow, inf, nan, xnan, ynan, znan,
|
||||
x[51:0], y[51:0], z[51:0],
|
||||
w[51:0], postnorrnalize, infinity, specialsel);
|
||||
bypass bypass(xrf[63:0], zrf[63:0], wbypass[63:0], bypsel[1:0],
|
||||
x[63:0], z[63:0]);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen expgen(x[62:52], y[62:52], z[62:52],
|
||||
earlyres[62:52], earlyressel, bypsel[1], byppostnorm,
|
||||
killprod, sumzero, postnorrnalize, normcnt,
|
||||
infinity, invalid, overflow, underflow,
|
||||
inf, nan, xnan, ynan, znan, zdenorm, specialsel,
|
||||
aligncnt, w[62:52], wbypass[62:52],
|
||||
prodof, sumof, sumuf, denorm0, ae);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(x[63:0], y[63:0], z[63:0], ae, xzero, yzero, zzero,
|
||||
xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm,
|
||||
xinf, yinf, zinf);
|
||||
|
||||
// Produce W for bypass
|
||||
|
||||
assign wbypass[51:0] = v[53:2];
|
||||
assign wbypass[63] = w[63];
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
sign sign(x[63], y[63], z[63], negsum0, negsum1, bs, ps,
|
||||
killprod, rm, sumzero, nan, invalid, xinf, yinf, inf,
|
||||
w[63], invz, negsum, selsum1, psign);
|
||||
flag flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
psign, z[63], xzero, yzero, v[1:0],
|
||||
inf, nan, invalid, overflow, underflow, inexact);
|
||||
|
||||
endmodule
|
||||
|
41
wally-pipelined/src/fpu/FMA/lop.v
Normal file
41
wally-pipelined/src/fpu/FMA/lop.v
Normal file
@ -0,0 +1,41 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: lop.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements a Leading One Predictor used to determine
|
||||
// the normalization shift count.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module lop(sum, normcnt, sumzero);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [157:0] sum; // sum
|
||||
output [8:0] normcnt; // normalization shift count
|
||||
output sumzero; // sum = 0
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [8:0] i; // loop index
|
||||
reg [8:0] normcnt; // normalization shift count
|
||||
|
||||
// A real LOP uses a fast carry chain to find only the first 0.
|
||||
// It is an example of a parallel prefix algorithm. For the sake
|
||||
// of simplicity, this model is behavioral instead.
|
||||
// A real LOP would also operate on the sources of the adder, not
|
||||
// the result!
|
||||
|
||||
always @ ( sum)
|
||||
begin
|
||||
i = 0;
|
||||
while (~sum[157-i] && i < 157) i = i+1; // search for leading one
|
||||
normcnt = i; // compute shift count
|
||||
end
|
||||
|
||||
// Also check if sum is zero
|
||||
assign sumzero = ~(|sum);
|
||||
|
||||
endmodule
|
||||
|
63
wally-pipelined/src/fpu/FMA/normalize.v
Normal file
63
wally-pipelined/src/fpu/FMA/normalize.v
Normal file
@ -0,0 +1,63 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: normalize.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the normalization shift. It also
|
||||
// generates the Rands bits for rounding. Finally, it
|
||||
// handles the special case of a zero sum.
|
||||
//
|
||||
// v[53:2] is the fraction component of the prerounded result.
|
||||
// It can be bypassed back to the X or Z inputs of the FMAC
|
||||
// for back-to-back operations.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module normalize(sum[157:0], normcnt, sumzero, bs, ps, denorm0, zdenorm, v[53:0]);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
input [157:0] sum; // sum
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input sumzero; // sum is zero
|
||||
input bs; // sticky bit for addend
|
||||
input ps; // sticky bit for product
|
||||
input denorm0; // exponent = -1023
|
||||
input zdenorm; // Input Z is denormalized
|
||||
output [53:0] v; // normalized sum, R, S bits
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [53:0] v; // normalized sum, R, S bits
|
||||
wire [157:0] sumshifted; // shifted sum
|
||||
|
||||
// When the sum is zero, normalization does not apply and only the
|
||||
// sticky bit must be computed. Otherwise, the sum is right-shifted
|
||||
// and the Rand S bits (v[1] and v[O], respectively) are assigned.
|
||||
|
||||
// The R bit is also set on denormalized numbers where the exponent
|
||||
// was computed to be exactly -1023 and the L bit was set. This
|
||||
// is required for correct rounding up of multiplication results.
|
||||
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
always @(sum or normcnt or sumzero or bs or ps or sumshifted or denorm0)
|
||||
begin
|
||||
if (sumzero) begin // special case
|
||||
v[53:1] = 0;
|
||||
v[0] = ps || bs ;
|
||||
end else begin // extract normalized bits
|
||||
v[53:3] = sumshifted[156:106];
|
||||
// KEP prevent plus1 in round.v when z is denormalized.
|
||||
v[2] = sumshifted[105] || sumshifted[106] && denorm0 && ~zdenorm;
|
||||
v[1] = sumshifted[104] || sumshifted[105] && denorm0 && ~zdenorm;
|
||||
v[0] = |(sumshifted[103:0]) || ps || bs;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// shift sum left by normcnt, filling the right with zeros
|
||||
assign sumshifted = sum << normcnt;
|
||||
|
||||
endmodule
|
||||
|
106
wally-pipelined/src/fpu/FMA/round.v
Normal file
106
wally-pipelined/src/fpu/FMA/round.v
Normal file
@ -0,0 +1,106 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: round.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z inputs, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
//
|
||||
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
|
||||
// muxed in to form the actual result for register file writeback. This
|
||||
// saves a mux from the writeback path.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module round(v[53:0], earlyres[51:0], earlyressel, rz, rn, rp, rm, wsign,
|
||||
invalid, overflow, underflow, inf, nan, xnan, ynan, znan,
|
||||
x[51:0], y[51:0], z[51:0],
|
||||
w[51:0], postnormalize, infinity, specialsel);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] v; // normalized sum, R, S bits
|
||||
input [51:0] earlyres; // result from other FPU blocks
|
||||
input earlyressel; // use result from other FPU blocks
|
||||
input rz; // Round toward zero
|
||||
input rn; // Round toward nearest
|
||||
input rp; // Round toward plus infinity
|
||||
input rm; // Round toward minus infinity
|
||||
input wsign; // Sign of result
|
||||
input invalid; // Trap on infinity, NaN, denorm
|
||||
input overflow; // Result overflowed
|
||||
input underflow; // Result underflowed
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input [51:0] x; // Input X
|
||||
input [51:0] y; // Input Y
|
||||
input [51:0] z; // Input Z
|
||||
output [51:0] w; // rounded result of FMAC
|
||||
output postnormalize; // Right shift 1 for post-rounding norm
|
||||
output infinity; // Generate infinity on overflow
|
||||
output specialsel; // Select special result
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire plus1; // Round by adding one
|
||||
wire [52:0] v1; // Result + 1 (for rounding)
|
||||
wire [51:0] specialres; // Result of exceptional case
|
||||
wire [51:0] infinityres; // Infinity or largest real number
|
||||
wire [51:0] nanres; // Propagated or generated NaN
|
||||
|
||||
// Compute if round should occur. This equation is derived from
|
||||
// the rounding tables.
|
||||
|
||||
|
||||
assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
|
||||
rp && ~wsign && (v[1] || v[0]) ||
|
||||
rm && wsign && (v[1] || v[0]);
|
||||
|
||||
// Compute rounded result
|
||||
assign v1 = v[53:2] + 1;
|
||||
// Determine if postnormalization is necessary
|
||||
// Predicted by all bits =1 before round +1
|
||||
|
||||
assign postnormalize = &(v[53:2]) && plus1;
|
||||
|
||||
// Determine special result in event of of selection of a result from
|
||||
// another FPU functional unit, infinity, NAN, or underflow
|
||||
// The special result mux is a 4:1 mux that should not appear in the
|
||||
// critical path of the machine. It is not priority encoded, despite
|
||||
// the code below suggesting otherwise. Also, several of the identical data
|
||||
// inputs to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = earlyressel || overflow || underflow || invalid ||
|
||||
nan || inf;
|
||||
assign specialres = earlyressel ? earlyres :
|
||||
invalid ? nanres :
|
||||
overflow ? infinityres :
|
||||
inf ? 52'b0 :
|
||||
underflow ? 52'b0 : 52'bx; // default to undefined
|
||||
|
||||
// Overflow is handled differently for different rounding modes
|
||||
// Round is to either infinity or to maximum finite number
|
||||
|
||||
assign infinity = rn || (rp && ~wsign) || (rm && wsign);
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
// propagate an input if the input is NaN. Since we assume all
|
||||
// NaN inputs are already quiet, we don't have to force them quiet.
|
||||
|
||||
// assign nanres = xnan ? x: (ynan ? y : (znan ? z : {1'b1, 51'b0})); // original
|
||||
assign nanres = xnan ? {1'b1, x[50:0]}: (ynan ? {1'b1, y[50:0]} : (znan ? {1'b1, z[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
|
||||
|
||||
// Select result with 4:1 mux
|
||||
// If the sum is zero and we round up, there is a special case in
|
||||
// which we produce a massive loss of significance and trap to software.
|
||||
// It is handled in the exception unit.
|
||||
|
||||
assign w = specialsel ? specialres : (plus1 ? v1[51:0] : v[53:2]);
|
||||
|
||||
endmodule
|
||||
|
93
wally-pipelined/src/fpu/FMA/sign.v
Normal file
93
wally-pipelined/src/fpu/FMA/sign.v
Normal file
@ -0,0 +1,93 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: sign.v
|
||||
// Author: David Harris
|
||||
// Date: 12/1/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block manages the signs of the numbers.
|
||||
// 1 = negative
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bs, ps, killprod, rm,
|
||||
sumzero, nan, invalid, xinf, yinf, inf, wsign, invz, negsum, selsum1, psign);
|
||||
////////////////////////////////////////////////////////////////////////////I
|
||||
|
||||
input xsign; // Sign of X
|
||||
input ysign; // Sign of Y
|
||||
input zsign; // Sign of Z
|
||||
input negsum0; // Sum in +O mode is negative
|
||||
input negsum1; // Sum in +1 mode is negative
|
||||
input bs; // sticky bit from addend
|
||||
input ps; // sticky bit from product
|
||||
input killprod; // Product forced to zero
|
||||
input rm; // Round toward minus infinity
|
||||
input sumzero; // Sum = O
|
||||
input nan; // Some input is NaN
|
||||
input invalid; // Result invalid
|
||||
input xinf; // X = Inf
|
||||
input yinf; // Y = Inf
|
||||
input inf; // Some input = Inf
|
||||
output wsign; // Sign of W
|
||||
output invz; // Invert addend into adder
|
||||
output negsum; // Negate result of adder
|
||||
output selsum1; // Select +1 mode from compound adder
|
||||
output psign; // sign of product X * Y
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire zerosign; // sign if result= 0
|
||||
wire infsign; // sign if result= Inf
|
||||
reg negsum; // negate result of adder
|
||||
reg selsum1; // select +1 mode from compound adder
|
||||
|
||||
// Compute sign of product
|
||||
|
||||
assign psign = xsign ^ ysign;
|
||||
|
||||
// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
|
||||
assign invz = zsign ^ psign;
|
||||
// Select +l mode for adder and compute if result must be negated
|
||||
// This is done according to cases based on the sticky bit.
|
||||
|
||||
always @(invz or negsum0 or negsum1 or bs or ps)
|
||||
begin
|
||||
if (~invz) begin // both inputs have same sign
|
||||
negsum = 0;
|
||||
selsum1 = 0;
|
||||
end else if (bs) begin // sticky bit set on addend
|
||||
selsum1 = 0;
|
||||
negsum = negsum0;
|
||||
end else if (ps) begin // sticky bit set on product
|
||||
selsum1 = 1;
|
||||
negsum = negsum1;
|
||||
end else begin // both sticky bits clear
|
||||
selsum1 = negsum1; // KEP 210113-10:44 Selsum1 was adding 1 to values that were multiplied by 0
|
||||
// selsum1 = ~negsum1; //original
|
||||
negsum = negsum1;
|
||||
end
|
||||
end
|
||||
|
||||
// Compute sign of result
|
||||
// This involves a special case when the sum is zero:
|
||||
// x+x retains the same sign as x even when x = +/- 0.
|
||||
// otherwise, x-x = +O unless in the RM mode when x-x = -0
|
||||
// There is also a special case for NaNs and invalid results;
|
||||
// the sign of the NaN produced is forced to be 0.
|
||||
// Sign calculation is not in the critical path so the cases
|
||||
// can be tolerated.
|
||||
// IEEE 754-2008 section 6.3 states
|
||||
// "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// also pertaining to negZero it states:
|
||||
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
|
||||
|
||||
assign zerosign = (~invz && killprod) ? zsign : rm;
|
||||
assign infsign = psign; //KEP 210112 keep the correct sign when result is infinity
|
||||
// assign infsign = xinf ? (yinf ? psign : xsign) : yinf ? ysign : zsign;//original
|
||||
assign wsign =invalid? 0 : (inf ? infsign:
|
||||
(sumzero ? zerosign : psign ^ negsum));
|
||||
|
||||
endmodule
|
70
wally-pipelined/src/fpu/FMA/special.v
Normal file
70
wally-pipelined/src/fpu/FMA/special.v
Normal file
@ -0,0 +1,70 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: special.v
|
||||
// Author: David Harris
|
||||
// Date: 12/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements special case handling for unusual operands (e.g.
|
||||
// 0, NaN, denormalize, infinity). The block consists of zero/one detectors.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module special(x[63:0], y[63:0], z[63:0], ae, xzero, yzero, zzero,
|
||||
xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, xinf, yinf, zinf);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] x; // Input x
|
||||
input [63:0] y; // Input Y
|
||||
input [63:0] z; // Input z
|
||||
input [12:0] ae; // exponent of product
|
||||
output xzero; // Input x = 0
|
||||
output yzero; // Input y = 0
|
||||
output zzero; // Input z = 0
|
||||
output xnan; // x is NaN
|
||||
output ynan; // y is NaN
|
||||
output znan; // z is NaN
|
||||
output xdenorm; // x is denormalized
|
||||
output ydenorm; // y is denormalized
|
||||
output zdenorm; // z is denormalized
|
||||
output proddenorm; // product is denormalized
|
||||
output xinf; // x is infinity
|
||||
output yinf; // y is infinity
|
||||
output zinf; // z is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
|
||||
// Check if input is NaN
|
||||
|
||||
assign xnan = &x[62:52] && |x[51:0];
|
||||
assign ynan = &y[62:52] && |y[51:0];
|
||||
assign znan = &z[62:52] && |z[51:0];
|
||||
|
||||
// Check if input is denormalized
|
||||
|
||||
assign xdenorm = ~(|x[62:52]) && |x[51:0];
|
||||
assign ydenorm = ~(|y[62:52]) && |y[51:0];
|
||||
assign zdenorm = ~(|z[62:52]) && |z[51:0];
|
||||
assign proddenorm = &ae & ~xzero & ~yzero; //KEP is the product denormalized
|
||||
|
||||
// Check if input is infinity
|
||||
|
||||
assign xinf = &x[62:52] && ~(|x[51:0]);
|
||||
assign yinf = &y[62:52] && ~(|y[51:0]);
|
||||
assign zinf = &z[62:52] && ~(|z[51:0]);
|
||||
|
||||
// Check if inputs are all zero
|
||||
// Also forces denormalized inputs to zero.
|
||||
// In the circuit implementation, this can be optimized
|
||||
// to just check if the exponent is zero.
|
||||
|
||||
// KATHERINE - commented following (21/01/11)
|
||||
// assign xzero = ~(|x[62:0]) || xdenorm;
|
||||
// assign yzero = ~(|y[62:0]) || ydenorm;
|
||||
// assign zzero = ~(|z[62:0]) || zdenorm;
|
||||
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
|
||||
assign xzero = ~(|x[62:0]);
|
||||
assign yzero = ~(|y[62:0]);
|
||||
assign zzero = ~(|z[62:0]);
|
||||
endmodule
|
2824
wally-pipelined/src/fpu/FMA/tb.v
Normal file
2824
wally-pipelined/src/fpu/FMA/tb.v
Normal file
File diff suppressed because it is too large
Load Diff
1997
wally-pipelined/src/fpu/FMA/tbgen/StineVectors
Normal file
1997
wally-pipelined/src/fpu/FMA/tbgen/StineVectors
Normal file
File diff suppressed because it is too large
Load Diff
199
wally-pipelined/src/fpu/FMA/tbgen/ans
Normal file
199
wally-pipelined/src/fpu/FMA/tbgen/ans
Normal file
@ -0,0 +1,199 @@
|
||||
c22000007fffffff 24700000ffffffef a6a00001800007ed
|
||||
bfc00000000011fe 3fdfffffffffff03 bfb000000000117f
|
||||
a83100000007fffe 41e0000effffffff aa21000ff0080004
|
||||
0000000000000000 001ffffffffffffe 0000000000000000
|
||||
400327ca64d70ec7 3ca0000000000001 3cb327ca64d70ec9
|
||||
0000000000000000 43e207ffffffffff 0000000000000000
|
||||
0000000000000000 3fd0000000000000 0000000000000000
|
||||
0000000000000000 3fdfffffffffffff 0000000000000000
|
||||
0000000000000000 3fe0000000000000 0000000000000000
|
||||
c870200000010000 3fefffffffffffff c87020000000ffff
|
||||
c00aaa4fd557ef13 c3b8917384eb32d0 43d478efdc9216d8
|
||||
0000000000000000 7ffc000000000000 7ff8000000000000
|
||||
0000000000000000 c18aca47203438e2 0000000000000000
|
||||
0000000000000000 4000000000000001 0000000000000000
|
||||
47efff0008000000 b1dcb0523546117f b9dcaf6cb9e07bdb
|
||||
43f000ffffff7fff 22300000001fffdf 26300100001f81de
|
||||
402ff000001fffff 40759558e27de226 40b58a8e3622388e
|
||||
0000000000000000 40efdeffffffffff 0000000000000000
|
||||
0000000000000000 434fffffffffffff 0000000000000000
|
||||
7ffc000000000000 7fe0000000000000 7ff8000000000000
|
||||
b35e061abc769f3a c078000003fffffe 33e684941119bac2
|
||||
403a793cfb1e2471 bff0000100007fff c03a793ea2b2c7eb
|
||||
3d1ffffbfe000000 216898822a24af3f 1e98987f158ae1d8
|
||||
bfb00000001bffff 7ffc000000000000 7ff8000000000000
|
||||
37f0000000efffff c3d00007fffffeff bbd0000800efff75
|
||||
0000000000000000 ffefff8000080000 0000000000000000
|
||||
3fb00200000000ff c0000000011fffff bfc00200012024fd
|
||||
41c0000007ffffff 49103fffefffffff 4ae03ffff81ffff6
|
||||
407effbfffffffff 3e00000040001fff 3e8effc07bff3dfd
|
||||
c1f00013fffffffe 7ffc000000000000 7ff8000000000000
|
||||
c3f00004000001ff c3d00bfffffffffe 47d00c04030001ff
|
||||
403b5ab30b28be12 bfdfffffffffffff c02b5ab30b28be11
|
||||
0000000000000000 c1cfffffff87ffff 0000000000000000
|
||||
0000000000000000 bfe0000000000001 0000000000000000
|
||||
801ffc000007ffff bfeffffffffffffe 001ffc000007fffe
|
||||
0000000000000000 ffe0000005fffffe 0000000000000000
|
||||
0000000000000000 bfffffffffffffff 0000000000000000
|
||||
0000000000000000 c000000000000000 0000000000000000
|
||||
c3d09308769f3f51 c00fffffffffffff 43f09308769f3f51
|
||||
0000000000000000 402ffffdfefffffe 0000000000000000
|
||||
0000000000000000 c010000000000001 0000000000000000
|
||||
c01fffffffc00fff c01ffffffffffffe 404fffffffc00ffe
|
||||
c025e14360f49046 412fff0000000003 c165e09456d988a3
|
||||
0000000000000000 43ee59a2f1155c8b 0000000000000000
|
||||
3fe0000000008fff 802ffffff7fffff6 801ffffff8011ff3
|
||||
0000000000000000 ffefffffffffffff 0000000000000000
|
||||
40401007fffffffe fff0000000000000 80401007fffffffe
|
||||
0000000000000000 c0045abb4860cbf3 0000000000000000
|
||||
0000000000000000 7ffc000000000000 7ff8000000000000
|
||||
bffffffec0000000 c000000000003eff 400ffffec0007dfe
|
||||
48000000004001ff 41f331de979ac49e 4a0331de97e78e7e
|
||||
3d0fffffbff7ffff 7ffc000000000000 7ff8000000000000
|
||||
43d3ffffff000000 3caffffffffffffe 4093fffffeffffff
|
||||
7ffc000000000000 43dfff8004000000 7ff8000000000000
|
||||
bcaffe0000000008 3fd00008000000ff bc8ffe0fff000205
|
||||
404ffbfffffffffc c34ffff8003fffff c3affbf8013ff7fb
|
||||
43e0000000000082 3db000003ffffeff 41a000003fffff82
|
||||
c1d004000ffffffe 4000000000000000 c1e004000ffffffe
|
||||
c00fffffc000007e c02ffffdfffffbff 404ffffdc000007e
|
||||
409dfffbffffffff 4010000000000001 40bdfffc00000001
|
||||
c120000003ffffe0 c06000fffbffffff 4190010000003fde
|
||||
3fd1f7ffffffffff c01000001dffffff bff1f80021b0fffd
|
||||
2e0fefdfffffffff 4030000020000040 2e4fefe03fdfc07f
|
||||
43c0000803ffffff 3fcfffffffffffff 43a0000803ffffff
|
||||
c0afffffbffffdfe 3fc07ffdffffffff c0807ffddf0002f5
|
||||
c0fffffffeffffee 55139bb9349e058c d6239bb9340127b7
|
||||
41ffdbaf18ce06bd 8010000000000000 821fdbaf18ce06bd
|
||||
c0e1000000080000 801ffffffffffffe 011100000007ffff
|
||||
3fbffffff0000007 c807dfffffffffff c7d7dffff4100004
|
||||
c357b53537b96da5 bfd0000000000000 4337b53537b96da5
|
||||
401fffffffffffff ffebff8000000000 801bff7fffffffff
|
||||
c7eff77bf2b59c3c bfe0000000000001 47dff77bf2b59c3e
|
||||
380c3f72cc3dec98 c3fffffffbffffff bc1c3f72c8b5fe3d
|
||||
b8e0000003fbffff c503f4d44f4bf888 3df3f4d454443066
|
||||
3f3ffffc001fffff c000000000000001 bf4ffffc00200000
|
||||
c340002000004000 c0db3367e0423019 442b339e47125d6b
|
||||
4f60000801ffffff 41c07fe000000000 51307fe841fffbff
|
||||
c1ffffffbfefffff c340000000000001 454fffffbff00001
|
||||
404fff7fffffff7f 48ab7e2aad4ec686 490b7dbcb4a410dd
|
||||
7ffc000000000000 ffefffffffffffff 7ff8000000000000
|
||||
41e189ea1a6fff97 7ffc000000000000 7ff8000000000000
|
||||
3ff0ee9046c9330f 8479e1e79766e02b 847b63d14ff91acb
|
||||
d2f805130a8c11df 43effffdfdfffffe d6f8051188ba9004
|
||||
4f1fffbfffe00000 bcd02000000007ff cc001fdfbfefe7fe
|
||||
be70000077ffffff c1efffffffffffff 4070000077ffffff
|
||||
41e1ffffbffffffe 3caffffffffffffe 3ea1ffffbffffffd
|
||||
3bbd976272fb1d2a c06ffff80007fffe bc3d975b0d29e641
|
||||
434fff01ffffffff 403dfeffffffffff 439dfe11e7efffff
|
||||
be6fff7fffffffff 3feffffffffffffe be6fff7ffffffffd
|
||||
41d007ff80000000 41f0fffffffc0000 43d1087f77fbfe01
|
||||
ffeef7a206029708 bdcfa4109a3a5b22 7dce9eaa2542875b
|
||||
3b6ffffffeffffc0 3c7ffffe003ffffe 37fffffdff3fffce
|
||||
c1d1ffffffbfffff bfcffffefffff800 41b1ffff6fbffb82
|
||||
2030000000000090 c05e2e90015c47a1 a09e2e90015c48b0
|
||||
bbf000000007efff 001fe0000007fffe fc1fe0000017d01c
|
||||
41cae866712069f4 c02fffffffffffff c20ae866712069f3
|
||||
bfce1e32ccf56348 3ca1f66d4c8eeef3 bc80e7fa025544da
|
||||
ffedfffff0000000 ffeffff000000800 3fedfff0f0000f80
|
||||
37effffc3ffffffe bca0fffffffffffd b4a0fffe01fffffb
|
||||
bc950a021bf9dee1 3db0001fffdffffe ba550a2c2fd402cd
|
||||
fd4fffffdfffffef 41cffffdffffffef ff2ffffde00001de
|
||||
bfc00000004007ff bcafffffffffffff 3c800000004007ff
|
||||
c009130b80fe8274 b811571307061a38 382b2cb1993b60f3
|
||||
c0600000ffffffdf 7feda1b8c591f9c6 805da1ba9fad85e2
|
||||
c1e4af3f8d45e031 3ca0020002000000 be94b1d577cd70de
|
||||
3800008100000000 b810000020000080 b020008120010280
|
||||
372ff00000003fff 7fe000fdfffffffe 771ff1fb02003fff
|
||||
47d00021fffffffe c00fffffffffffff c7f00021fffffffd
|
||||
bfbc9ea0c2b4884b 43f4a552574073d5 c3c277000b21a4e7
|
||||
bf1fe0000000ffff c01ffffffffffffe 3f4fe0000000fffe
|
||||
41ffffffff7ffffb 0027ffffffffeffe 0237ffffff9feffb
|
||||
c7e040000fffffff ffe0000000000000 07d040000fffffff
|
||||
7ffc000000000000 3fe0000ffffff7ff 7ff8000000000000
|
||||
c1effc1fffffffff 7ffc000000000000 7ff8000000000000
|
||||
c0d000000001ffbf c03ba46e644e4e9c 411ba46e6451c2ba
|
||||
c4500000005fffff c03a20ab4de47fc9 449a20ab4e8143cc
|
||||
400e00000000007e 001fffffffffffff 003e00000000007e
|
||||
45a01fffff7fffff c3c0020200000000 c9702206037fefee
|
||||
3e8ff800000000ff 3caffffffffffffe 3b4ff800000000fe
|
||||
be004000000007fe 3fdffff7ff7fffff bdf03ffbefbf07fd
|
||||
b11000007ffffe00 3fe0000000000000 b10000007ffffe00
|
||||
b80cef50bd17db40 c05fffc00000000e 387cef16de76611d
|
||||
3d4000ffffffffff 3d47f68d8eb6b9a4 3a97f80cf78fa50f
|
||||
ffe3fffffffffffb c03dc3321aaa5380 003299ff50aa742c
|
||||
3ca3fffffffffeff bf02ffafb4e9241d bbb7bf9ba2236bf3
|
||||
53598c812c3c39dd 3f20000100fffffe 52898c82c69d14b1
|
||||
c3dffffff8000001 3fe0020000003ffe c3d001fffbffbffe
|
||||
7ba00800003fffff 3ff9a9a129c791b3 7ba9b675fac31bff
|
||||
c3d0000fffffffef 7fe0000000000001 83c0000ffffffff0
|
||||
c34f80001fffffff b7fffffe0007ffff 3b5f7ffe2807ddff
|
||||
0010000000001ff8 4800020000010000 0820020000011ffc
|
||||
2c4c0000003fffff 230ffffc00400000 0f6bfffc8077fff8
|
||||
381fffffffbff7fe 8010000000000000 f83fffffffbff7fe
|
||||
802d3018ea8c241d c007fdffffffffff 0045e23fae5a7253
|
||||
43e047fffffffffe 4000003ffdfffffe 43f048411df6fffc
|
||||
c000005fffffffff 403ffffffff00002 c050005ffff7ffd0
|
||||
3fc8b60e46a80f6d bfdffffffffffffe bfb8b60e46a80f6b
|
||||
bd5fdffdffffffff 5644b72ace1bbb6b d3b4a27257daf2cd
|
||||
b80010001fffffff 40e01ffffff7fffe b8f030202037f7fc
|
||||
407000003ffbfffe 38042862fe8e3368 388428634f2ab547
|
||||
bf8ffbfff7ffffff c00fffffffffffff 3faffbfff7ffffff
|
||||
bcafc000003fffff c010000000000001 3ccfc00000400001
|
||||
47eddf042473ef08 b7e00000fe000000 bfdddf05fea850ca
|
||||
3fbfffff7fffffef c340ffffffffffbf c310ffffbbffffb5
|
||||
c02f8000000007ff ffe0000000000001 001f800000000801
|
||||
002f37ebf6c8eaec c08be464f4c81c69 80cb36000706e168
|
||||
c00e800000000000 7ffc000000000000 7ff8000000000000
|
||||
0010000000000000 0000000000000000 0000000000000000
|
||||
bfffc00000000003 391001ffffffffff b91fc3f800000001
|
||||
c1db54446247aa52 bfcc001fffffffff 41b7e9d72a43174f
|
||||
0010000000000000 c0392c59c8e48f37 80592c59c8e48f37
|
||||
0010000000000000 c0000800000001ff 80200800000001ff
|
||||
0010000000000000 c1d0000004000fff 81f0000004000fff
|
||||
4030040000200000 0017055f48beeff5 00570b20a0bf2a70
|
||||
bc7000000000ffee c1e0001100000000 3e6000110000fff0
|
||||
c040000000007fff c3b2a6c91c557f56 4402a6c91c56148c
|
||||
41ffffffff003fff c3b0000007ffffee c5c0000007801fed
|
||||
21900001dfffffff bf20000017fffffe a0c00001f80002cc
|
||||
0029954d0f0df5b3 41e00000000003ff 0219954d0f0dfc17
|
||||
b810000020000001 47ffdfffffffff80 c01fe0003fbfff81
|
||||
0010000000000000 ffeffff800007fff c00ffff800007fff
|
||||
0010000000000000 4010000000000000 0030000000000000
|
||||
bf700000000100ff 401fffffffffffff bfa00000000100fe
|
||||
37feffffffffffff 47ef8000000fffff 3ffe8400000f7fff
|
||||
b80f800001fffffe 44e00000ffff7fff bcff8001f9ff041c
|
||||
0010000000000000 434ffffffffffffe 036ffffffffffffe
|
||||
41ffffdfffff8000 7fe0000000000001 01efffdfffff8002
|
||||
b80a16ad02c87cd3 380fffffffffe7fe b02a16ad02c86940
|
||||
47f0fffffffffffb 7ffc000000000000 7ff8000000000000
|
||||
0010000000000000 41ffffffffbfff7f 021fffffffbfff7f
|
||||
0010000000000000 8000000000000000 0000000000000000
|
||||
c3d00001000001ff b7f60cb3edb38762 3bd60cb54e7ec8fe
|
||||
0010000000000000 8010000000000001 c030000000000001
|
||||
43c0007fffdfffff 801ffffffffffffe 83f0007fffdffffd
|
||||
c7efffffdffffbff bca0000000000001 449fffffdffffc01
|
||||
0010000000000000 c11ff00000000003 813ff00000000003
|
||||
0010000000000000 bfd0000000000000 ffefffffffffffff
|
||||
c0ffffffffeffffe bfdfffffffffffff 40efffffffeffffe
|
||||
6f7000000001fdff 1510010000000fff 4490010000020e1e
|
||||
37f002000000000f b1effcfffffffffe a9f0007fd000000d
|
||||
cc3050bc013d7cd7 bff0000000000000 4c3050bc013d7cd7
|
||||
0010000000000000 87fff0000000fffe c81ff0000000fffe
|
||||
0010000000000000 bffffffffffffffe 801ffffffffffffe
|
||||
43effbfffffff7ff 7fefffffff801ffe 03effbffff8027fa
|
||||
c015834380f2b995 3f9fff0000000400 bfc5829766d6b4af
|
||||
0010000000000000 41dfffffc0001000 01ffffffc0001000
|
||||
0010000000000000 c01fffffffffffff 803fffffffffffff
|
||||
41e010000000001f c5b04000000fffff c7a050400010101e
|
||||
3b40018000000000 3ea0400000000100 39f0418600000101
|
||||
0010000000000000 4cdffeffff7fffff 0cfffeffff7fffff
|
||||
16dff0001ffffffe 3fb500ae0796659d 16a4f62dc5934871
|
||||
b7e003ffffffff7f deafffffeffffffd 56a003fff7fdff7e
|
||||
406000001fffbfff 3f20020000080000 3f900200200bbff8
|
||||
0010000000000000 7ffc000000000000 7ff8000000000000
|
||||
439fbffffffbffff bf8454fd38ef0ba0 c3342c533e7aa2e8
|
||||
c1c000000200007e bf000001ffffffbf 40d000020200007e
|
||||
480000000008fffe 001637e790e69de2 082637e790f31d52
|
||||
bffffffc000003fe 3ca0000000000001 bcaffffc000003ff
|
||||
6b4848a9a8c0dcd5 480ffffffffbdfff 736848a9a8bdbb77
|
199
wally-pipelined/src/fpu/FMA/tbgen/output
Normal file
199
wally-pipelined/src/fpu/FMA/tbgen/output
Normal file
@ -0,0 +1,199 @@
|
||||
c22000007fffffff 24700000ffffffef a6a00001800007ee
|
||||
bfc00000000011fe 3fdfffffffffff03 bfb000000000117f
|
||||
a83100000007fffe 41e0000effffffff aa21000ff0080004
|
||||
0000000000000000 001ffffffffffffe 0000000000000000
|
||||
400327ca64d70ec7 3ca0000000000001 3cb327ca64d70ec8
|
||||
0000000000000000 43e207ffffffffff 0000000000000000
|
||||
0000000000000000 3fd0000000000000 0000000000000000
|
||||
0000000000000000 3fdfffffffffffff 0000000000000000
|
||||
0000000000000000 3fe0000000000000 0000000000000000
|
||||
c870200000010000 3fefffffffffffff c87020000000ffff
|
||||
c00aaa4fd557ef13 c3b8917384eb32d0 43d478efdc9216d7
|
||||
0000000000000000 7ffc000000000000 7ffc000000000000
|
||||
0000000000000000 c18aca47203438e2 8000000000000000
|
||||
0000000000000000 4000000000000001 0000000000000000
|
||||
47efff0008000000 b1dcb0523546117f b9dcaf6cb9e07bdc
|
||||
43f000ffffff7fff 22300000001fffdf 26300100001f81de
|
||||
402ff000001fffff 40759558e27de226 40b58a8e3622388d
|
||||
0000000000000000 40efdeffffffffff 0000000000000000
|
||||
0000000000000000 434fffffffffffff 0000000000000000
|
||||
7ffc000000000000 7fe0000000000000 7ffc000000000000
|
||||
b35e061abc769f3a c078000003fffffe 33e684941119bac1
|
||||
403a793cfb1e2471 bff0000100007fff c03a793ea2b2c7eb
|
||||
3d1ffffbfe000000 216898822a24af3f 1e98987f158ae1d8
|
||||
bfb00000001bffff 7ffc000000000000 7ffc000000000000
|
||||
37f0000000efffff c3d00007fffffeff bbd0000800efff76
|
||||
0000000000000000 ffefff8000080000 8000000000000000
|
||||
3fb00200000000ff c0000000011fffff bfc00200012024fe
|
||||
41c0000007ffffff 49103fffefffffff 4ae03ffff81ffff6
|
||||
407effbfffffffff 3e00000040001fff 3e8effc07bff3dfd
|
||||
c1f00013fffffffe 7ffc000000000000 7ffc000000000000
|
||||
c3f00004000001ff c3d00bfffffffffe 47d00c04030001fe
|
||||
403b5ab30b28be12 bfdfffffffffffff c02b5ab30b28be11
|
||||
0000000000000000 c1cfffffff87ffff 8000000000000000
|
||||
0000000000000000 bfe0000000000001 8000000000000000
|
||||
801ffc000007ffff bfeffffffffffffe 001ffc000007fffd
|
||||
0000000000000000 ffe0000005fffffe 8000000000000000
|
||||
0000000000000000 bfffffffffffffff 8000000000000000
|
||||
0000000000000000 c000000000000000 8000000000000000
|
||||
c3d09308769f3f51 c00fffffffffffff 43f09308769f3f50
|
||||
0000000000000000 402ffffdfefffffe 0000000000000000
|
||||
0000000000000000 c010000000000001 8000000000000000
|
||||
c01fffffffc00fff c01ffffffffffffe 404fffffffc00ffd
|
||||
c025e14360f49046 412fff0000000003 c165e09456d988a4
|
||||
0000000000000000 43ee59a2f1155c8b 0000000000000000
|
||||
3fe0000000008fff 802ffffff7fffff6 801ffffff8011ff4
|
||||
0000000000000000 ffefffffffffffff 8000000000000000
|
||||
40401007fffffffe fff0000000000000 fff0000000000000
|
||||
0000000000000000 c0045abb4860cbf3 8000000000000000
|
||||
0000000000000000 7ffc000000000000 7ffc000000000000
|
||||
bffffffec0000000 c000000000003eff 400ffffec0007dfe
|
||||
48000000004001ff 41f331de979ac49e 4a0331de97e78e7d
|
||||
3d0fffffbff7ffff 7ffc000000000000 7ffc000000000000
|
||||
43d3ffffff000000 3caffffffffffffe 4093fffffeffffff
|
||||
7ffc000000000000 43dfff8004000000 7ffc000000000000
|
||||
bcaffe0000000008 3fd00008000000ff bc8ffe0fff000206
|
||||
404ffbfffffffffc c34ffff8003fffff c3affbf8013ff7fb
|
||||
43e0000000000082 3db000003ffffeff 41a000003fffff81
|
||||
c1d004000ffffffe 4000000000000000 c1e004000ffffffe
|
||||
c00fffffc000007e c02ffffdfffffbff 404ffffdc000007d
|
||||
409dfffbffffffff 4010000000000001 40bdfffc00000001
|
||||
c120000003ffffe0 c06000fffbffffff 4190010000003fde
|
||||
3fd1f7ffffffffff c01000001dffffff bff1f80021b0fffe
|
||||
2e0fefdfffffffff 4030000020000040 2e4fefe03fdfc07f
|
||||
43c0000803ffffff 3fcfffffffffffff 43a0000803fffffe
|
||||
c0afffffbffffdfe 3fc07ffdffffffff c0807ffddf0002f6
|
||||
c0fffffffeffffee 55139bb9349e058c d6239bb9340127b7
|
||||
41ffdbaf18ce06bd 8010000000000000 821fdbaf18ce06bd
|
||||
c0e1000000080000 801ffffffffffffe 011100000007ffff
|
||||
3fbffffff0000007 c807dfffffffffff c7d7dffff4100004
|
||||
c357b53537b96da5 bfd0000000000000 4337b53537b96da5
|
||||
401fffffffffffff ffebff8000000000 fff0000000000000
|
||||
c7eff77bf2b59c3c bfe0000000000001 47dff77bf2b59c3e
|
||||
380c3f72cc3dec98 c3fffffffbffffff bc1c3f72c8b5fe3e
|
||||
b8e0000003fbffff c503f4d44f4bf888 3df3f4d454443065
|
||||
3f3ffffc001fffff c000000000000001 bf4ffffc00200001
|
||||
c340002000004000 c0db3367e0423019 442b339e47125d6b
|
||||
4f60000801ffffff 41c07fe000000000 51307fe841fffbff
|
||||
c1ffffffbfefffff c340000000000001 454fffffbff00001
|
||||
404fff7fffffff7f 48ab7e2aad4ec686 490b7dbcb4a410dc
|
||||
7ffc000000000000 ffefffffffffffff 7ffc000000000000
|
||||
41e189ea1a6fff97 7ffc000000000000 7ffc000000000000
|
||||
3ff0ee9046c9330f 8479e1e79766e02b 847b63d14ff91acb
|
||||
d2f805130a8c11df 43effffdfdfffffe d6f8051188ba9004
|
||||
4f1fffbfffe00000 bcd02000000007ff cc001fdfbfefe7ff
|
||||
be70000077ffffff c1efffffffffffff 4070000077fffffe
|
||||
41e1ffffbffffffe 3caffffffffffffe 3ea1ffffbffffffd
|
||||
3bbd976272fb1d2a c06ffff80007fffe bc3d975b0d29e642
|
||||
434fff01ffffffff 403dfeffffffffff 439dfe11e7effffe
|
||||
be6fff7fffffffff 3feffffffffffffe be6fff7ffffffffd
|
||||
41d007ff80000000 41f0fffffffc0000 43d1087f77fbfe00
|
||||
ffeef7a206029708 bdcfa4109a3a5b22 7dce9eaa2542875b
|
||||
3b6ffffffeffffc0 3c7ffffe003ffffe 37fffffdff3fffce
|
||||
c1d1ffffffbfffff bfcffffefffff800 41b1ffff6fbffb81
|
||||
2030000000000090 c05e2e90015c47a1 a09e2e90015c48b1
|
||||
bbf000000007efff 001fe0000007fffe 8000000000000000
|
||||
41cae866712069f4 c02fffffffffffff c20ae866712069f3
|
||||
bfce1e32ccf56348 3ca1f66d4c8eeef3 bc80e7fa025544db
|
||||
ffedfffff0000000 ffeffff000000800 7ff0000000000000
|
||||
37effffc3ffffffe bca0fffffffffffd b4a0fffe01fffffc
|
||||
bc950a021bf9dee1 3db0001fffdffffe ba550a2c2fd402ce
|
||||
fd4fffffdfffffef 41cffffdffffffef ff2ffffde00001de
|
||||
bfc00000004007ff bcafffffffffffff 3c800000004007fe
|
||||
c009130b80fe8274 b811571307061a38 382b2cb1993b60f2
|
||||
c0600000ffffffdf 7feda1b8c591f9c6 fff0000000000000
|
||||
c1e4af3f8d45e031 3ca0020002000000 be94b1d577cd70df
|
||||
3800008100000000 b810000020000080 b020008120010280
|
||||
372ff00000003fff 7fe000fdfffffffe 771ff1fb02003fff
|
||||
47d00021fffffffe c00fffffffffffff c7f00021fffffffd
|
||||
bfbc9ea0c2b4884b 43f4a552574073d5 c3c277000b21a4e8
|
||||
bf1fe0000000ffff c01ffffffffffffe 3f4fe0000000fffd
|
||||
41ffffffff7ffffb 0027ffffffffeffe 0237ffffff9feffa
|
||||
c7e040000fffffff ffe0000000000000 7ff0000000000000
|
||||
7ffc000000000000 3fe0000ffffff7ff 7ffc000000000000
|
||||
c1effc1fffffffff 7ffc000000000000 7ffc000000000000
|
||||
c0d000000001ffbf c03ba46e644e4e9c 411ba46e6451c2ba
|
||||
c4500000005fffff c03a20ab4de47fc9 449a20ab4e8143cb
|
||||
400e00000000007e 001fffffffffffff 003e00000000007d
|
||||
45a01fffff7fffff c3c0020200000000 c9702206037fefef
|
||||
3e8ff800000000ff 3caffffffffffffe 3b4ff800000000fd
|
||||
be004000000007fe 3fdffff7ff7fffff bdf03ffbefbf07fd
|
||||
b11000007ffffe00 3fe0000000000000 b10000007ffffe00
|
||||
b80cef50bd17db40 c05fffc00000000e 387cef16de76611d
|
||||
3d4000ffffffffff 3d47f68d8eb6b9a4 3a97f80cf78fa50e
|
||||
ffe3fffffffffffb c03dc3321aaa5380 7ff0000000000000
|
||||
3ca3fffffffffeff bf02ffafb4e9241d bbb7bf9ba2236bf3
|
||||
53598c812c3c39dd 3f20000100fffffe 52898c82c69d14b0
|
||||
c3dffffff8000001 3fe0020000003ffe c3d001fffbffbfff
|
||||
7ba00800003fffff 3ff9a9a129c791b3 7ba9b675fac31bff
|
||||
c3d0000fffffffef 7fe0000000000001 fff0000000000000
|
||||
c34f80001fffffff b7fffffe0007ffff 3b5f7ffe2807ddfe
|
||||
0010000000001ff8 4800020000010000 0820020000011ffc
|
||||
2c4c0000003fffff 230ffffc00400000 0f6bfffc8077fff7
|
||||
381fffffffbff7fe 8010000000000000 8000000000000000
|
||||
802d3018ea8c241d c007fdffffffffff 0045e23fae5a7253
|
||||
43e047fffffffffe 4000003ffdfffffe 43f048411df6fffc
|
||||
c000005fffffffff 403ffffffff00002 c050005ffff7ffd0
|
||||
3fc8b60e46a80f6d bfdffffffffffffe bfb8b60e46a80f6b
|
||||
bd5fdffdffffffff 5644b72ace1bbb6b d3b4a27257daf2cd
|
||||
b80010001fffffff 40e01ffffff7fffe b8f030202037f7fd
|
||||
407000003ffbfffe 38042862fe8e3368 388428634f2ab547
|
||||
bf8ffbfff7ffffff c00fffffffffffff 3faffbfff7fffffe
|
||||
bcafc000003fffff c010000000000001 3ccfc00000400001
|
||||
47eddf042473ef08 b7e00000fe000000 bfdddf05fea850cb
|
||||
3fbfffff7fffffef c340ffffffffffbf c310ffffbbffffb6
|
||||
c02f8000000007ff ffe0000000000001 7ff0000000000000
|
||||
002f37ebf6c8eaec c08be464f4c81c69 80cb36000706e169
|
||||
c00e800000000000 7ffc000000000000 7ffc000000000000
|
||||
0010000000000000 0000000000000000 0000000000000000
|
||||
bfffc00000000003 391001ffffffffff b91fc3f800000001
|
||||
c1db54446247aa52 bfcc001fffffffff 41b7e9d72a43174f
|
||||
0010000000000000 c0392c59c8e48f37 80592c59c8e48f37
|
||||
0010000000000000 c0000800000001ff 80200800000001ff
|
||||
0010000000000000 c1d0000004000fff 81f0000004000fff
|
||||
4030040000200000 0017055f48beeff5 00570b20a0bf2a70
|
||||
bc7000000000ffee c1e0001100000000 3e6000110000ffef
|
||||
c040000000007fff c3b2a6c91c557f56 4402a6c91c56148b
|
||||
41ffffffff003fff c3b0000007ffffee c5c0000007801fed
|
||||
21900001dfffffff bf20000017fffffe a0c00001f80002cd
|
||||
0029954d0f0df5b3 41e00000000003ff 0219954d0f0dfc17
|
||||
b810000020000001 47ffdfffffffff80 c01fe0003fbfff82
|
||||
0010000000000000 ffeffff800007fff c00ffff800007fff
|
||||
0010000000000000 4010000000000000 0030000000000000
|
||||
bf700000000100ff 401fffffffffffff bfa00000000100fe
|
||||
37feffffffffffff 47ef8000000fffff 3ffe8400000f7ffe
|
||||
b80f800001fffffe 44e00000ffff7fff bcff8001f9ff041c
|
||||
0010000000000000 434ffffffffffffe 036ffffffffffffe
|
||||
41ffffdfffff8000 7fe0000000000001 7ff0000000000000
|
||||
b80a16ad02c87cd3 380fffffffffe7fe b02a16ad02c86940
|
||||
47f0fffffffffffb 7ffc000000000000 7ffc000000000000
|
||||
0010000000000000 41ffffffffbfff7f 021fffffffbfff7f
|
||||
0010000000000000 8000000000000000 8000000000000000
|
||||
c3d00001000001ff b7f60cb3edb38762 3bd60cb54e7ec8fd
|
||||
0010000000000000 8010000000000001 8000000000000000
|
||||
43c0007fffdfffff 801ffffffffffffe 83f0007fffdffffe
|
||||
c7efffffdffffbff bca0000000000001 449fffffdffffc01
|
||||
0010000000000000 c11ff00000000003 813ff00000000003
|
||||
0010000000000000 bfd0000000000000 8000000000000000
|
||||
c0ffffffffeffffe bfdfffffffffffff 40efffffffeffffd
|
||||
6f7000000001fdff 1510010000000fff 4490010000020e1e
|
||||
37f002000000000f b1effcfffffffffe a9f0007fd000000e
|
||||
cc3050bc013d7cd7 bff0000000000000 4c3050bc013d7cd7
|
||||
0010000000000000 87fff0000000fffe 8000000000000000
|
||||
0010000000000000 bffffffffffffffe 801ffffffffffffe
|
||||
43effbfffffff7ff 7fefffffff801ffe 7ff0000000000000
|
||||
c015834380f2b995 3f9fff0000000400 bfc5829766d6b4b0
|
||||
0010000000000000 41dfffffc0001000 01ffffffc0001000
|
||||
0010000000000000 c01fffffffffffff 803fffffffffffff
|
||||
41e010000000001f c5b04000000fffff c7a050400010101e
|
||||
3b40018000000000 3ea0400000000100 39f0418600000100
|
||||
0010000000000000 4cdffeffff7fffff 0cfffeffff7fffff
|
||||
16dff0001ffffffe 3fb500ae0796659d 16a4f62dc5934870
|
||||
b7e003ffffffff7f deafffffeffffffd 56a003fff7fdff7d
|
||||
406000001fffbfff 3f20020000080000 3f900200200bbff7
|
||||
0010000000000000 7ffc000000000000 7ffc000000000000
|
||||
439fbffffffbffff bf8454fd38ef0ba0 c3342c533e7aa2e8
|
||||
c1c000000200007e bf000001ffffffbf 40d000020200007d
|
||||
480000000008fffe 001637e790e69de2 082637e790f31d51
|
||||
bffffffc000003fe 3ca0000000000001 bcaffffc00000400
|
||||
6b4848a9a8c0dcd5 480ffffffffbdfff 736848a9a8bdbb76
|
1
wally-pipelined/src/fpu/FMA/tbgen/results.dat
Normal file
1
wally-pipelined/src/fpu/FMA/tbgen/results.dat
Normal file
@ -0,0 +1 @@
|
||||
0020000803ffffff bfcb4181a9468e24 000fffffffffffff 7fe2f9c2bca0f33c 00092f9c2bca0f33 Wrong zdenorm 18
|
BIN
wally-pipelined/src/fpu/FMA/tbgen/tb
Executable file
BIN
wally-pipelined/src/fpu/FMA/tbgen/tb
Executable file
Binary file not shown.
116
wally-pipelined/src/fpu/FMA/tbgen/tb.c
Normal file
116
wally-pipelined/src/fpu/FMA/tbgen/tb.c
Normal file
@ -0,0 +1,116 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
void main() {
|
||||
FILE *fp, *fq, *fr;
|
||||
int cnt=0;
|
||||
char *ln;
|
||||
size_t nbytes = 80;
|
||||
|
||||
ln = (char *)malloc(nbytes + 1);
|
||||
|
||||
// fp = fopen("tb.dat","r");
|
||||
fp = fopen("testFloat","r");
|
||||
fq = fopen("tb.v","a");
|
||||
system("cp tbhead.v tb.v");
|
||||
int k=0;
|
||||
for(k=0; k<91 && !feof(fp); k++) {
|
||||
//3FDBFFFFFFFFFF7F DE608000000001FF 43CFED83C17EDBD0 DE4CE000000002F9 01
|
||||
// b68ffff8000000ff_3f9080000007ffff_b6307ffbe0080080_00001
|
||||
char ch;
|
||||
int i,j;
|
||||
char *ln;
|
||||
char xrf[17];
|
||||
char y[17];
|
||||
char zrf[17];
|
||||
char ans[81];
|
||||
char flags[3];
|
||||
int rn,rz,rm,rp;
|
||||
{
|
||||
//my_string = (char *) malloc (nbytes + 1);
|
||||
//bytes_read = getline (&my_string, &nbytes, stdin);
|
||||
if(getline(&ln,&nbytes,fp) < 0) break;
|
||||
//fprintf(stderr,"%s\n", ln);
|
||||
|
||||
strncpy(xrf, ln, 16); xrf[16]=0;
|
||||
strncpy(y, &ln[17], 16); y[16]=0;
|
||||
strncpy(zrf, &ln[34], 16); zrf[16]=0;
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,zrf);
|
||||
strncpy(ans, &ln[51], 16); ans[16]=0;
|
||||
strncpy(flags,&ln[68],2); flags[2]=0;
|
||||
|
||||
// fprintf(stdout,"[%s]\n[%s]\n", ln,zrf);
|
||||
fprintf(fq," xrf = 64'h%s;\n",xrf);
|
||||
fprintf(fq," y = 64'h%s;\n",y);
|
||||
fprintf(fq," zrf = 64'h%s;\n",zrf);
|
||||
fprintf(fq," ans = 64'h%s;\n", ans);
|
||||
// fprintf(fq," flags = 5'h%s;\n", flags);
|
||||
}
|
||||
|
||||
{
|
||||
//rn=1; rz=0; rm=0; rp=0;
|
||||
fprintf(fq," rn = %d;\n",1);
|
||||
fprintf(fq," rz = %d;\n", 0);
|
||||
fprintf(fq," rm = %d;\n", 0);
|
||||
fprintf(fq," rp = %d;\n", 0);
|
||||
}
|
||||
{
|
||||
fprintf(fq," earlyres = 64'b0;\n");
|
||||
fprintf(fq," earlyressel = 0;\n");
|
||||
}
|
||||
{
|
||||
|
||||
fprintf(fq," bypsel= 2'b0;\n"); //, bysel);
|
||||
fprintf(fq," bypplus1 = 0;\n"); //, byp1);
|
||||
fprintf(fq," byppostnorm = 0;\n"); //, bypnorm);
|
||||
}
|
||||
fprintf(fq,"#10\n");
|
||||
// IEEE 754-2008 section 6.3 states "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
//fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h \",xrf,y,w, ans);\n");
|
||||
fprintf(fq," // IEEE 754-2008 section 6.3 states: \"When ether an input or result is NaN, this\n");
|
||||
fprintf(fq," // standard does not interpret the sign of a NaN.\"\n");
|
||||
fprintf(fq," nan = (w > 64'h7FF0000000000000 && w < 64'h7FF8000000000000) ||\n");
|
||||
fprintf(fq," (w > 64'hFFF8000000000000 && w < 64'hFFF8000000000000 ) ||\n");
|
||||
fprintf(fq," (w >= 64'h7FF8000000000000 && w <= 64'h7FFfffffffffffff ) ||\n");
|
||||
fprintf(fq," (w >= 64'hFFF8000000000000 && w <= 64'hFFFfffffffffffff );\n");
|
||||
// fprintf(fq," if(!(~(|xrf[62:52]) && |xrf[51:0] || ~(|y[62:52]) && |y[51:0])) begin\n");
|
||||
// not looknig at negative zero results right now
|
||||
//fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) && !(w == 64'h8000000000000000 && ans == 64'b0)) begin\n");
|
||||
fprintf(fq," if( (nan && (w[62:0] != ans[62:0])) || (!nan && (w != ans)) ) begin\n");
|
||||
fprintf(fq," $fwrite(fp, \"%%h %%h %%h %%h %%h Wrong \",xrf,y, zrf, w, ans);\n");
|
||||
fprintf(fq," if(w == 64'h8000000000000000) $fwrite(fp, \"w=-zero \");\n");
|
||||
fprintf(fq," if(~(|xrf[62:52]) && |xrf[51:0]) $fwrite(fp, \"xdenorm \");\n");
|
||||
fprintf(fq," if(~(|y[62:52]) && |y[51:0]) $fwrite(fp, \"ydenorm \");\n");
|
||||
fprintf(fq," if(~(|zrf[62:52]) && |zrf[51:0]) $fwrite(fp, \"zdenorm \");\n");
|
||||
fprintf(fq," if(invalid != 0) $fwrite(fp, \"invld \");\n");
|
||||
fprintf(fq," if(overflow != 0) $fwrite(fp, \"ovrflw \");\n");
|
||||
fprintf(fq," if(underflow != 0) $fwrite(fp, \"unflw \");\n");
|
||||
fprintf(fq," if(w == 64'hFFF0000000000000) $fwrite(fp, \"w=-inf \");\n");
|
||||
fprintf(fq," if(w == 64'h7FF0000000000000) $fwrite(fp, \"w=+inf \");\n");
|
||||
fprintf(fq," if(w > 64'h7FF0000000000000 && w < 64'h7FF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
|
||||
fprintf(fq," if(w > 64'hFFF8000000000000 && w < 64'hFFF8000000000000 ) $fwrite(fp, \"w=sigNaN \");\n");
|
||||
fprintf(fq," if(w >= 64'h7FF8000000000000 && w <= 64'h7FFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
|
||||
fprintf(fq," if(w >= 64'hFFF8000000000000 && w <= 64'hFFFfffffffffffff ) $fwrite(fp, \"w=qutNaN \");\n");
|
||||
|
||||
fprintf(fq," if(ans == 64'hFFF0000000000000) $fwrite(fp, \"ans=-inf \");\n");
|
||||
fprintf(fq," if(ans == 64'h7FF0000000000000) $fwrite(fp, \"ans=+inf \");\n");
|
||||
fprintf(fq," if(ans > 64'h7FF0000000000000 && ans < 64'h7FF8000000000000 ) $fwrite(fp, \"ans=sigNaN \");\n");
|
||||
fprintf(fq," if(ans > 64'hFFF8000000000000 && ans < 64'hFFF8000000000000 ) $fwrite(fp, \"ans=sigNaN \");\n");
|
||||
fprintf(fq," if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $fwrite(fp, \"ans=qutNaN \");\n");
|
||||
fprintf(fq," if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $fwrite(fp, \"ans=qutNaN \");\n");
|
||||
fprintf(fq," $fwrite(fp,\"%d\\n\");\n",cnt);
|
||||
if(cnt == 358)fprintf(fq," $stop;\n");
|
||||
// fprintf(fq," end\n");
|
||||
fprintf(fq," end\n");
|
||||
cnt++;
|
||||
|
||||
//if(cnt > 100) break;
|
||||
fflush(fq);
|
||||
}
|
||||
|
||||
fprintf(fq, "\t$stop;\n\tend\nendmodule");
|
||||
fclose(fq);
|
||||
fclose(fp);
|
||||
}
|
||||
|
3952
wally-pipelined/src/fpu/FMA/tbgen/tb.v
Normal file
3952
wally-pipelined/src/fpu/FMA/tbgen/tb.v
Normal file
File diff suppressed because it is too large
Load Diff
BIN
wally-pipelined/src/fpu/FMA/tbgen/tbgen
Executable file
BIN
wally-pipelined/src/fpu/FMA/tbgen/tbgen
Executable file
Binary file not shown.
36
wally-pipelined/src/fpu/FMA/tbgen/tbhead.v
Normal file
36
wally-pipelined/src/fpu/FMA/tbgen/tbhead.v
Normal file
@ -0,0 +1,36 @@
|
||||
`timescale 1 ns/10 ps
|
||||
module tb;
|
||||
|
||||
|
||||
reg [63:0] xrf;
|
||||
reg [63:0] y;
|
||||
reg [63:0] zrf;
|
||||
reg [63:0] ans;
|
||||
reg rn;
|
||||
reg rz;
|
||||
reg rm;
|
||||
reg rp;
|
||||
reg [63:0] earlyres;
|
||||
reg earlyressel;
|
||||
reg [1:0] bypsel;
|
||||
reg bypplus1;
|
||||
reg byppostnorm;
|
||||
wire [63:0] w;
|
||||
wire [63:0] wbypass;
|
||||
wire invalid;
|
||||
wire overflow;
|
||||
wire underflow;
|
||||
wire inexact;
|
||||
|
||||
integer fp;
|
||||
reg nan;
|
||||
|
||||
localparam period = 20;
|
||||
fmac UUT(.xrf(xrf), .y(y), .zrf(zrf), .rn(rn), .rz(rz), .rp(rp), .rm(rm),
|
||||
.earlyres(earlyres), .earlyressel(earlyressel), .bypsel(bypsel), .bypplus1(bypplus1), .byppostnorm(byppostnorm),
|
||||
.w(w), .wbypass(wbypass), .invalid(invalid), .overflow(overflow), .underflow(underflow), .inexact(inexact));
|
||||
|
||||
|
||||
initial
|
||||
begin
|
||||
fp = $fopen("/home/kparry/code/FMAC/tbgen/results.dat","w");
|
1000
wally-pipelined/src/fpu/FMA/tbgen/testMini
Normal file
1000
wally-pipelined/src/fpu/FMA/tbgen/testMini
Normal file
File diff suppressed because it is too large
Load Diff
1
wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
Executable file
1
wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
Executable file
@ -0,0 +1 @@
|
||||
testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
|
1
wally-pipelined/src/fpu/FMA/tbgen/tstFlMult.awk
Executable file
1
wally-pipelined/src/fpu/FMA/tbgen/tstFlMult.awk
Executable file
@ -0,0 +1 @@
|
||||
awk 'BEGIN {FS = " "; OFS = "_"} {if ($3 == "0000000000000000") print $1, $2, $4;}' testFloat | head -n 1000 > testMini
|
Loading…
Reference in New Issue
Block a user