forked from Github_Repos/cvw
Almost all convert instructions pass Imperas tests
This commit is contained in:
parent
20f2a4e47c
commit
36f59f3c99
@ -46,7 +46,7 @@
|
||||
`define MEM_DCACHE 0
|
||||
`define MEM_DTIM 1
|
||||
`define MEM_ICACHE 0
|
||||
`define MEM_VIRTMEM 0\1
|
||||
`define MEM_VIRTMEM 1
|
||||
`define VECTORED_INTERRUPTS_SUPPORTED 1
|
||||
|
||||
`define ITLB_ENTRIES 32
|
||||
@ -56,10 +56,7 @@
|
||||
`define PMP_ENTRIES 16
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 64'h0000000080000000
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
`define RESET_VECTOR 64'h80000000
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
@ -84,6 +81,9 @@
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
|
@ -8,7 +8,7 @@ add wave /testbench/clk
|
||||
add wave /testbench/reset
|
||||
add wave -divider
|
||||
#add wave /testbench/dut/hart/ebu/IReadF
|
||||
add wave /testbench/dut/hart/DataStall
|
||||
#add wave /testbench/dut/hart/DataStall
|
||||
add wave /testbench/dut/hart/ICacheStallF
|
||||
add wave /testbench/dut/hart/StallF
|
||||
add wave /testbench/dut/hart/StallD
|
||||
|
@ -1,65 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Block Name: add.v
|
||||
// Author: David Harris
|
||||
// Date: 11/12/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the addition of the product and addend. It also
|
||||
// contains logic necessary to adjust the signs for effective subtracts
|
||||
// and negative results.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
module add(rM, sM, tM, sum,
|
||||
negsum, invz, selsum1, negsum0, negsum1, killprodM);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [105:0] rM; // partial product 1
|
||||
input logic [105:0] sM; // partial product 2
|
||||
input logic [163:0] tM; // aligned addend
|
||||
input logic invz; // invert addend
|
||||
input logic selsum1; // select +1 mode of compound adder
|
||||
input logic killprodM; // z >> product
|
||||
input logic negsum; // Negate sum
|
||||
output logic [163:0] sum; // sum
|
||||
output logic negsum0; // sum was negative in +0 mode
|
||||
output logic negsum1; // sum was negative in +1 mode
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire [105:0] r2; // partial product possibly zeroed out
|
||||
wire [105:0] s2; // partial product possibly zeroed out
|
||||
wire [164:0] t2; // addend after inversion if necessary
|
||||
wire [164:0] sum0; // sum of compound adder +0 mode
|
||||
wire [164:0] sum1; // sum of compound adder +1 mode
|
||||
wire [163:0] prodshifted; // sum of compound adder +1 mode
|
||||
wire [164:0] tmp; // sum of compound adder +1 mode
|
||||
|
||||
// Invert addend if z'sM sign is diffrent from the product'sM sign
|
||||
|
||||
assign t2 = invz ? ~{1'b0,tM} : {1'b0,tM};
|
||||
|
||||
// Zero out product if Z >> product or product really should be
|
||||
|
||||
assign r2 = killprodM ? 106'b0 : rM;
|
||||
assign s2 = killprodM ? 106'b0 : sM;
|
||||
|
||||
//***replace this with a more structural cpa that synthisises better
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
|
||||
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
|
||||
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
|
||||
|
||||
// Check sign bits in +0/1 modes
|
||||
assign negsum0 = sum0[164];
|
||||
assign negsum1 = sum1[164];
|
||||
|
||||
// Mux proper result (+Oil mode and inversion) using 4:1 mux
|
||||
//assign sumzero = |sum;
|
||||
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
|
||||
|
||||
endmodule
|
||||
|
@ -1,88 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: align.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the alignment shifter. It is responsible for
|
||||
// adjusting the fraction portion of the addend relative to the fraction
|
||||
// produced in the multiplier array.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
killprodE, sumshiftE, sumshiftzeroE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [51:0] zman; // Fraction of addend z;
|
||||
input logic [12:0] aligncntE; // amount to shift
|
||||
input logic xzeroE; // Input X = 0
|
||||
input logic yzeroE; // Input Y = 0
|
||||
input logic zzeroE; // Input Z = 0
|
||||
input logic zdenormE; // Input Z is denormalized
|
||||
output logic [163:0] tE; // aligned addend (54 bits left of bpt)
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // Z >> product
|
||||
output logic [8:0] sumshiftE;
|
||||
output logic sumshiftzeroE;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [215:0] shift; // aligned addend from shifter
|
||||
logic [12:0] tmp;
|
||||
|
||||
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Default to clearing sticky bits
|
||||
bsE = 0;
|
||||
|
||||
// And to using product as primary operand in adder I exponent gen
|
||||
killprodE = xzeroE | yzeroE;
|
||||
// d = aligncntE
|
||||
// p = 53
|
||||
//***try reducing this hardware to use one shifter
|
||||
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
|
||||
//product ancored case with saturated shift
|
||||
sumshiftE = 163; // 3p+4
|
||||
sumshiftzeroE = 0;
|
||||
shift = {1'b1,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
|
||||
// product ancored or cancellation
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0]; // p + 2 - d
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
|
||||
// addend ancored case
|
||||
// used to be 56 \/ somthing doesn't seem right too many typos
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0];
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
|
||||
end else begin // d >= p+3
|
||||
// addend anchored case with saturated shift
|
||||
sumshiftE = 0;
|
||||
sumshiftzeroE = 1;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
killprodE = 1;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -1,53 +0,0 @@
|
||||
module booth(xExt, choose, add1, e, pp);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [53:0] xExt; // multiplicand xExt
|
||||
input logic [2:0] choose; // bits needed to choose which encoding
|
||||
output logic [1:0] add1; // do you add 1
|
||||
output logic e;
|
||||
output logic [54:0] pp; // the resultant encoding
|
||||
|
||||
logic [54:0] temp;
|
||||
logic [53:0] negx;
|
||||
//logic temp;
|
||||
|
||||
assign negx = ~xExt;
|
||||
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : pp = 55'b0; // 0
|
||||
3'b001 : pp = {1'b0, xExt}; // 1
|
||||
3'b010 : pp = {1'b0, xExt}; // 1
|
||||
3'b011 : pp = {xExt, 1'b0}; // 2
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {1'b1, negx}; // -1
|
||||
3'b110 : pp = {1'b1, negx}; // -1
|
||||
3'b111 : pp = '1; // -0
|
||||
endcase
|
||||
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : e = 0; // 0
|
||||
3'b001 : e = 0; // 1
|
||||
3'b010 : e = 0; // 1
|
||||
3'b011 : e = 0; // 2
|
||||
3'b100 : e = 1; // -2
|
||||
3'b101 : e = 1; // -1
|
||||
3'b110 : e = 1; // -1
|
||||
3'b111 : e = 1; // -0
|
||||
endcase
|
||||
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
|
||||
// assign add1 = choose[2];
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : add1 = 2'b0; // 0
|
||||
3'b001 : add1 = 2'b0; // 1
|
||||
3'b010 : add1 = 2'b0; // 1
|
||||
3'b011 : add1 = 2'b0; // 2
|
||||
3'b100 : add1 = 2'b10; // -2
|
||||
3'b101 : add1 = 2'b1; // -1
|
||||
3'b110 : add1 = 2'b1; // -1
|
||||
3'b111 : add1 = 2'b1; // -0
|
||||
endcase
|
||||
|
||||
endmodule
|
@ -1,90 +0,0 @@
|
||||
module add3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
parameter BITS = 4;
|
||||
input logic [BITS-1:0] a;
|
||||
input logic [BITS-1:0] b;
|
||||
input logic [BITS-1:0] c;
|
||||
output logic [BITS-1:0] carry;
|
||||
output logic [BITS-1:0] sum;
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
for(i= 0; i<BITS; i=i+1) begin
|
||||
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
module add4comp2(a, b, c, d, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
parameter BITS = 4;
|
||||
input logic [BITS-1:0] a;
|
||||
input logic [BITS-1:0] b;
|
||||
input logic [BITS-1:0] c;
|
||||
input logic [BITS-1:0] d;
|
||||
output logic [BITS:0] carry;
|
||||
output logic [BITS-1:0] sum;
|
||||
|
||||
logic [BITS-1:0] cout;
|
||||
logic carryTmp;
|
||||
genvar i;
|
||||
|
||||
|
||||
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
|
||||
|
||||
generate
|
||||
for(i= 1; i<BITS-1; i=i+1) begin
|
||||
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
|
||||
|
||||
assign carry[BITS-1] = carryTmp & cout[BITS-1];
|
||||
assign carry[BITS] = carryTmp ^ cout[BITS-1];
|
||||
|
||||
endmodule
|
||||
|
||||
module sng3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
input logic a;
|
||||
input logic b;
|
||||
input logic c;
|
||||
output logic carry;
|
||||
output logic sum;
|
||||
|
||||
logic axorb;
|
||||
|
||||
assign axorb = a ^ b;
|
||||
assign sum = axorb ^ c;
|
||||
|
||||
assign carry = axorb ? c : a;
|
||||
|
||||
endmodule
|
||||
|
||||
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into pass gate 4:2 counters?
|
||||
|
||||
input logic a;
|
||||
input logic b;
|
||||
input logic c;
|
||||
input logic d;
|
||||
input logic cin;
|
||||
output logic cout;
|
||||
output logic carry;
|
||||
output logic sum;
|
||||
|
||||
logic TmpSum;
|
||||
|
||||
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
|
||||
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
|
||||
|
||||
endmodule
|
@ -1,140 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen(xexp, yexp, zexp,
|
||||
killprod, sumzero, resultdenorm, normcnt, infinity,
|
||||
FmaFlagsM, inf, xzero, yzero,expplus1,
|
||||
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
|
||||
aligncnt, wexp,
|
||||
prodof, sumof, sumuf, denorm0, ae);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [62:52] xexp; // Exponent of multiplicand x
|
||||
input [62:52] yexp; // Exponent of multiplicand y
|
||||
input [62:52] zexp; // Exponent of addend z
|
||||
input killprod; // Z >> product
|
||||
input sumzero; // sum exactly equals zero
|
||||
input resultdenorm; // postnormalize rounded result
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input infinity; // generate infinity on overflow
|
||||
input [4:0] FmaFlagsM; // Result invalid
|
||||
input inf; // Some input is infinity
|
||||
input nan; // Some input is NaN
|
||||
input [12:0] de0; // X is NaN NaN
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input xdenorm; // Z is denorm
|
||||
input ydenorm; // Z is denorm
|
||||
input zdenorm; // Z is denorm
|
||||
input xzero; // Z is denorm
|
||||
input yzero; // Z is denorm
|
||||
input expplus1;
|
||||
input proddenorm; // product is denorm
|
||||
input specialsel; // Select special result
|
||||
input zexpsel; // Select special result
|
||||
output [12:0] aligncnt; // shift count for alignment shifter
|
||||
output [62:52] wexp; // Exponent of result
|
||||
output prodof; // X*Y exponent out of bounds
|
||||
output sumof; // X*Y+Z exponent out of bounds
|
||||
output sumuf; // X*Y+Z exponent underflows
|
||||
output denorm0; // exponent = 0 for denorm
|
||||
output [12:0] ae; //exponent of multiply
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
|
||||
assign ae = xzero|yzero ? 0 : xexp + yexp -1023;
|
||||
|
||||
assign prodof = (ae > 2046 && ~ae[12]);
|
||||
|
||||
// Compute alignment shift count
|
||||
// Adjust for postrounding normalization of Z.
|
||||
// This should not increas the critical path because the time to
|
||||
// check if a round overflows is shorter than the actual round and
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
assign aligncnt0 = - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
|
||||
assign aligncnt1 = - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
|
||||
assign aligncnt = zexp -ae - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
|
||||
//assign aligncnt = zexp -ae - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
|
||||
//assign aligncnt = zexp - ae;// KEP use all of ae
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : ae;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
// If the exponent becomes exactly zero (denormalized)
|
||||
// signal such to adjust R bit before rounding
|
||||
|
||||
assign denorm0 = (de0 == 0);
|
||||
|
||||
// check for exponent out of bounds after add
|
||||
|
||||
assign de = resultdenorm | sumzero ? 0 : de0;
|
||||
assign sumof = ~de[12] && de > 2046;
|
||||
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 11'b11111111111 :
|
||||
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input NaNs will provide the payload."
|
||||
assign nanres = xnan ? xexp : (ynan ? yexp : (znan? zexp : 11'b11111111111));
|
||||
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1;
|
||||
endmodule
|
||||
|
@ -1,90 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
xdenormE, ydenormE, zdenormE,
|
||||
aligncntE, prodof, aeE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic xdenormE; // Z is denorm
|
||||
input logic ydenormE; // Z is denorm
|
||||
input logic zdenormE; // Z is denorm
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [12:0] aligncntE; // shift count for alignment shifter
|
||||
output logic prodof; // X*Y exponent out of bounds
|
||||
output logic [12:0] aeE; //exponent of multiply
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
|
||||
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
|
||||
|
||||
assign prodof = (aeE > 2046 && ~aeE[12]);
|
||||
|
||||
// Compute alignment shift count
|
||||
// Adjust for postrounding normalization of Z.
|
||||
// This should not increas the critical path because the time to
|
||||
// check if a round overflows is shorter than the actual round and
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
//assign aligncntE = zexp - aeE;// KEP use all of aeE
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : aeE;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
endmodule
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: expgen.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements the exponent path of the FMAC. It performs the
|
||||
// following operations:
|
||||
//
|
||||
// 1) Compute exponent of multiply.
|
||||
// 2) Compare multiply and add exponents to generate alignment shift count
|
||||
// 3) Adjust exponent based on normalization
|
||||
// 4) Increment exponent based on postrounding renormalization
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module expgen2(xexp, yexp, zexp,
|
||||
sumzero, resultdenorm, infinity,
|
||||
FmaFlagsM, inf, expplus1,
|
||||
nanM, de0, xnanM, ynanM, znanM, specialsel,
|
||||
wexp,
|
||||
sumof, sumuf);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic sumzero; // sum exactly equals zero
|
||||
input logic resultdenorm; // postnormalize rounded result
|
||||
input logic infinity; // generate infinity on overflow
|
||||
input logic [4:0] FmaFlagsM; // Result invalid
|
||||
input logic inf; // Some input is infinity
|
||||
input logic nanM; // Some input is NaN
|
||||
input logic [12:0] de0; // X is NaN NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic expplus1;
|
||||
input logic specialsel; // Select special result
|
||||
output logic [62:52] wexp; // Exponent of result
|
||||
output logic sumof; // X*Y+Z exponent out of bounds
|
||||
output logic sumuf; // X*Y+Z exponent underflows
|
||||
|
||||
// Internal nodes
|
||||
|
||||
|
||||
wire [12:0] aligncnt0; // Shift count for alignment
|
||||
wire [12:0] aligncnt1; // Shift count for alignment
|
||||
wire [12:0] be; // Exponent of multiply
|
||||
wire [12:0] de1; // Normalized exponent
|
||||
wire [12:0] de; // Normalized exponent
|
||||
wire [10:0] infinityres; // Infinity or max number
|
||||
wire [10:0] nanres; // Nan propagated or generated
|
||||
wire [10:0] specialres; // Exceptional case result
|
||||
|
||||
// Compute exponent of multiply
|
||||
// Note that the exponent does not have to be incremented on a postrounding
|
||||
// normalization of X because the mantissa was already increased. Report
|
||||
// if exponent is out of bounds
|
||||
|
||||
// Select exponent (usually from product except in case of huge addend)
|
||||
|
||||
//assign be = zexpsel ? zexp : ae;
|
||||
|
||||
// Adjust exponent based on normalization
|
||||
// A compound adder takes care of the case of post-rounding normalization
|
||||
// requiring an extra increment
|
||||
|
||||
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
|
||||
|
||||
|
||||
// check for exponent out of bounds after add
|
||||
|
||||
assign de = resultdenorm | sumzero ? 0 : de0;
|
||||
assign sumof = ~de[12] && de > 2046;
|
||||
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
|
||||
|
||||
// bypass occurs before rounding or taking early results
|
||||
|
||||
//assign wbypass = de0[10:0];
|
||||
|
||||
// In a non-critical special mux, we combine the early result from other
|
||||
// FPU blocks with the results of exceptional conditions. Overflow
|
||||
// produces either infinity or the largest finite number, depending on the
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
|
||||
assign specialres = FmaFlagsM[4] | nanM ? nanres : // invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 11'b11111111111 :
|
||||
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
|
||||
|
||||
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input NaNs will provide the payload."
|
||||
assign nanres = xnanM ? xexp : (ynanM ? yexp : (znanM? zexp : 11'b11111111111));
|
||||
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
|
||||
endmodule
|
||||
|
||||
|
@ -1,88 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
|
||||
psign, zsign, xzero, yzero, zzero, vbits, killprod,
|
||||
inf, nan, FmaFlagsM,sticky);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input xnan; // X is NaN
|
||||
input ynan; // Y is NaN
|
||||
input znan; // Z is NaN
|
||||
input sticky; // X is Inf
|
||||
input xinf; // X is Inf
|
||||
input yinf; // Y is Inf
|
||||
input zinf; // Z is Inf
|
||||
input prodof; // X*Y overflows exponent
|
||||
input sumof; // X*Y + z underflows exponent
|
||||
input sumuf; // X*Y + z underflows exponent
|
||||
input psign; // Sign of product
|
||||
input zsign; // Sign of z
|
||||
input xzero; // x = 0
|
||||
input yzero; // y = 0
|
||||
input zzero; // y = 0
|
||||
input killprod;
|
||||
input [1:0] vbits; // R and S bits of result
|
||||
output inf; // Some source is Inf
|
||||
output nan; // Some source is NaN
|
||||
output [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire prodinf; // X*Y larger than max possible
|
||||
wire suminf; // X*Y+Z larger than max possible
|
||||
|
||||
// If any input is NaN, propagate the NaN
|
||||
|
||||
assign nan = xnan || ynan || znan;
|
||||
|
||||
// Same with infinity (inf - inf and O * inf don't propagate inf
|
||||
// but it's ok becaue illegal op takes higher precidence)
|
||||
|
||||
assign inf= xinf || yinf || zinf || suminf;//KEP added suminf
|
||||
//assign inf= xinf || yinf || zinf;//original
|
||||
|
||||
// Generate infinity checks
|
||||
|
||||
assign prodinf = prodof && ~xnan && ~ynan;
|
||||
//KEP added if the product is infinity then sum is infinity
|
||||
assign suminf = sumof && ~xnan && ~ynan && ~znan;
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
|
||||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
|
||||
|
||||
assign FmaFlagsM[3] = 0; // divide by zero flag
|
||||
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input is denormalized
|
||||
// 2) Output would be denormalized or smaller
|
||||
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
|
||||
|
||||
endmodule
|
@ -1,34 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic xnanE; // X is NaN
|
||||
input logic ynanE; // Y is NaN
|
||||
input logic znanE; // Z is NaN
|
||||
input logic prodof; // X*Y overflows exponent
|
||||
output logic nanE; // Some source is NaN
|
||||
|
||||
// Internal nodes
|
||||
|
||||
output logic prodinfE; // X*Y larger than max possible
|
||||
|
||||
// If any input logic is NaN, propagate the NaN
|
||||
|
||||
assign nanE = xnanE || ynanE || znanE;
|
||||
|
||||
|
||||
// Generate infinity checks
|
||||
|
||||
assign prodinfE = prodof && ~xnanE && ~ynanE;
|
||||
|
||||
|
||||
endmodule
|
@ -1,80 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: flag.v
|
||||
// Author: David Harris
|
||||
// Date: 12/6/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block generates the flags: invalid, overflow, underflow, inexact.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof, sumuf,
|
||||
xzeroM, yzeroM, zzeroM, vbits, killprodM,
|
||||
inf, nanM, FmaFlagsM,sticky,prodinfM);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic xsign; // Sign of z
|
||||
input logic ysign; // Sign of z
|
||||
input logic zsign; // Sign of z
|
||||
input logic sticky; // X is Inf
|
||||
input logic prodinfM;
|
||||
input logic xinfM; // X is Inf
|
||||
input logic yinfM; // Y is Inf
|
||||
input logic zinfM; // Z is Inf
|
||||
input logic sumof; // X*Y + z underflows exponent
|
||||
input logic sumuf; // X*Y + z underflows exponent
|
||||
input logic xzeroM; // x = 0
|
||||
input logic yzeroM; // y = 0
|
||||
input logic zzeroM; // y = 0
|
||||
input logic killprodM;
|
||||
input logic [1:0] vbits; // R and S bits of result
|
||||
output logic inf; // Some source is Inf
|
||||
input logic nanM; // Some source is NaN
|
||||
output logic [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic suminf;
|
||||
|
||||
// Same with infinity (inf - inf and O * inf don't propagate inf
|
||||
// but it's ok becaue illegal op takes higher precidence)
|
||||
|
||||
assign inf= xinfM || yinfM || zinfM || suminf;//KEP added suminf
|
||||
//assign inf= xinfM || yinfM || zinfM;//original
|
||||
|
||||
assign suminf = sumof && ~xnanM && ~ynanM && ~znanM;
|
||||
|
||||
|
||||
// Set the overflow flag for the following cases:
|
||||
// 1) Rounded multiply result would be out of bounds
|
||||
// 2) Rounded add result would be out of bounds
|
||||
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input logic is denormalized
|
||||
// 2) output logic would be denormalized or smaller
|
||||
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
|
||||
|
||||
// Set the inexact flag for the following cases:
|
||||
// 1) Multiplication inexact
|
||||
// 2) Addition inexact
|
||||
// One of these cases occurred if the R or S bit is set
|
||||
|
||||
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nanM);
|
||||
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
|
||||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
|
||||
|
||||
assign FmaFlagsM[3] = 0; // divide by zero flag
|
||||
|
||||
endmodule
|
@ -1,132 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: fmac.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This is the top level block of a floating-point multiply/accumulate
|
||||
// unit(FMAC). It instantiates the following sub-blocks:
|
||||
//
|
||||
// array Booth encoding, partial product generation, product summation
|
||||
// expgen Exponent summation, compare, and adjust
|
||||
// align Alignment shifter
|
||||
// add Carry-save adder for accumulate, carry propagate adder
|
||||
// lza Leading zero anticipator to control normalization shifter
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
FmaResultM, FmaFlagsM, aligncnt);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] ReadData1E; // input 1
|
||||
input [63:0] ReadData2E; // input 2
|
||||
input [63:0] ReadData3E; // input 3
|
||||
input [2:0] FrmE; // Rounding mode
|
||||
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
|
||||
output [4:0] FmaFlagsM; // status flags
|
||||
output [12:0] aligncnt; // status flags
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic [105:0] r; // one result of partial product sum
|
||||
logic [105:0] s; // other result of partial products
|
||||
logic [163:0] t; // output of alignment shifter
|
||||
logic [163:0] sum; // output of carry prop adder
|
||||
logic [53:0] v; // normalized sum, R, S bits
|
||||
// logic [12:0] aligncnt; // shift count for alignment
|
||||
logic [8:0] normcnt; // shift count for normalizer
|
||||
logic [12:0] ae; // multiplier expoent
|
||||
logic bs; // sticky bit of addend
|
||||
logic ps; // sticky bit of product
|
||||
logic killprod; // ReadData3E >> product
|
||||
logic negsum; // negate sum
|
||||
logic invz; // invert addend
|
||||
logic selsum1; // select +1 mode of sum
|
||||
logic negsum0; // sum +0 < 0
|
||||
logic negsum1; // sum +1 < 0
|
||||
logic sumzero; // sum = 0
|
||||
logic infinity; // generate infinity on overflow
|
||||
logic prodof; // ReadData1E*ReadData2E out of range
|
||||
logic sumof; // result out of range
|
||||
logic xzero;
|
||||
logic yzero;
|
||||
logic zzero;
|
||||
logic xdenorm;
|
||||
logic ydenorm;
|
||||
logic zdenorm;
|
||||
logic proddenorm;
|
||||
logic zexpsel;
|
||||
logic denorm0;
|
||||
logic resultdenorm;
|
||||
logic inf;
|
||||
logic xinf;
|
||||
logic yinf;
|
||||
logic zinf;
|
||||
logic xnan;
|
||||
logic ynan;
|
||||
logic znan;
|
||||
logic specialsel;
|
||||
logic expplus1;
|
||||
logic nan;
|
||||
logic sumuf;
|
||||
logic psign;
|
||||
logic sticky;
|
||||
logic [8:0] sumshift;
|
||||
logic sumshiftzero;
|
||||
logic [12:0] de0;
|
||||
logic isAdd;
|
||||
|
||||
assign isAdd = 1;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
|
||||
align align(.zman(ReadData3E[51:0]),.*);
|
||||
add add(.*);
|
||||
lza lza(.*);
|
||||
normalize normalize(.zexp(ReadData3E[62:52]),.*);
|
||||
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(.*);
|
||||
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
|
||||
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
|
||||
|
||||
endmodule
|
||||
|
@ -1,165 +0,0 @@
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] FInput1E, // X
|
||||
input logic [63:0] FInput2E, // Y
|
||||
input logic [63:0] FInput3E, // Z
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [211:0] Shift; // output of the alignment shifter including sticky bit
|
||||
logic XDenormE, YDenormE, ZDenormE; // inputs are denormal
|
||||
logic [63:0] FInput3E2; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign FInput3E2 = FOpCtrlE[2] ? 64'b0 : FInput3E;
|
||||
|
||||
// split inputs into the sign bit, fraction, and exponent and handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
assign XSgn = FInput1E[63];
|
||||
assign YSgn = FInput2E[63];
|
||||
assign ZSgn = FInput3E2[63];
|
||||
|
||||
assign XExp = FmtE ? {2'b0, FInput1E[62:52]} : {5'b0, FInput1E[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, FInput2E[62:52]} : {5'b0, FInput2E[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, FInput3E2[62:52]} : {5'b0, FInput3E2[62:55]};
|
||||
|
||||
assign XFrac = FmtE ? FInput1E[51:0] : {FInput1E[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? FInput2E[51:0] : {FInput2E[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? FInput3E2[51:0] : {FInput3E2[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
|
||||
|
||||
// determine if an input is a special value
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
|
||||
assign XDenormE = XExpZero & ~XFracZero;
|
||||
assign YDenormE = YExpZero & ~YFracZero;
|
||||
assign ZDenormE = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate the product's exponent
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenormE + YDenormE;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
|
||||
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenormE;
|
||||
|
||||
// Alignment shifter
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Set default values
|
||||
AddendStickyE = 0;
|
||||
KillProdE = 0;
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
AlignedAddendE = {107'b0, ZMan,2'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
Shift = {55'b0, ZMan, 104'b0} << -AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd105)) begin
|
||||
Shift = {55'b0, ZMan, 104'b0} >> AlignCnt;
|
||||
AlignedAddendE = Shift[211:50];
|
||||
AddendStickyE = |(Shift[49:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
AlignedAddendE = 162'b0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -1,282 +0,0 @@
|
||||
module fma2(
|
||||
|
||||
input logic [63:0] FInput1M,
|
||||
input logic [63:0] FInput2M,
|
||||
input logic [63:0] FInput3M,
|
||||
input logic [2:0] FrmM,
|
||||
input logic [105:0] ProdManM,
|
||||
input logic [161:0] AlignedAddendM,
|
||||
input logic [12:0] ProdExpM,
|
||||
input logic FmtM,
|
||||
input logic AddendStickyM,
|
||||
input logic KillProdM,
|
||||
input logic [2:0] FOpCtrlM,
|
||||
input logic XZeroM, YZeroM, ZZeroM,
|
||||
input logic XInfM, YInfM, ZInfM,
|
||||
input logic XNaNM, YNaNM, ZNaNM,
|
||||
output logic [63:0] FmaResultM,
|
||||
output logic [4:0] FmaFlagsM);
|
||||
|
||||
|
||||
|
||||
logic [51:0] XMan, YMan, ZMan, WMan;
|
||||
logic [10:0] XExp, YExp, ZExp, WExp;
|
||||
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
|
||||
logic [105:0] ProdMan2;
|
||||
logic [162:0] AlignedAddend2;
|
||||
logic [161:0] Sum;
|
||||
logic [162:0] SumTmp;
|
||||
logic [12:0] SumExp;
|
||||
logic [12:0] SumExpMinus1;
|
||||
logic [12:0] SumExpTmp, SumExpTmpMinus1, WExpTmp;
|
||||
logic [53:0] NormSum;
|
||||
logic [161:0] NormSumTmp;
|
||||
logic [8:0] NormCnt;
|
||||
logic NormSumSticky;
|
||||
logic SumZero;
|
||||
logic NegSum;
|
||||
logic InvZ;
|
||||
logic ResultDenorm;
|
||||
logic Sticky;
|
||||
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
|
||||
logic Invalid,Underflow,Overflow,Inexact;
|
||||
logic [8:0] DenormShift;
|
||||
logic ProdInf, ProdOf, ProdUf;
|
||||
logic [63:0] FmaResultTmp;
|
||||
logic SubBySmallNum;
|
||||
logic [63:0] FInput3M2;
|
||||
logic ZeroSgn, ResultSgn;
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign FInput3M2 = FOpCtrlM[2] ? 64'b0 : FInput3M;
|
||||
|
||||
// split inputs into the sign bit, mantissa, and exponent for readability
|
||||
|
||||
assign XSgn = FInput1M[63];
|
||||
assign YSgn = FInput2M[63];
|
||||
assign ZSgn = FInput3M2[63]^FOpCtrlM[0]; //Negate Z if subtraction
|
||||
|
||||
assign XExp = FmtM ? FInput1M[62:52] : {3'b0, FInput1M[62:55]};
|
||||
assign YExp = FmtM ? FInput2M[62:52] : {3'b0, FInput2M[62:55]};
|
||||
assign ZExp = FmtM ? FInput3M2[62:52] : {3'b0, FInput3M2[62:55]};
|
||||
|
||||
assign XMan = FmtM ? FInput1M[51:0] : {FInput1M[54:32], 29'b0};
|
||||
assign YMan = FmtM ? FInput2M[51:0] : {FInput2M[54:32], 29'b0};
|
||||
assign ZMan = FmtM ? FInput3M2[51:0] : {FInput3M2[54:32], 29'b0};
|
||||
|
||||
|
||||
|
||||
// Calculate the product's sign
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
|
||||
|
||||
|
||||
|
||||
|
||||
// Addition
|
||||
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = ZSgn ^ PSgn;
|
||||
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{1'b0,AlignedAddendM} : {1'b0,AlignedAddendM};
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
|
||||
|
||||
// Do the addition
|
||||
// - add one to negate if the added was inverted
|
||||
// - the 2 extra bits at the begining and end are needed for rounding
|
||||
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + {162'b0, InvZ};
|
||||
|
||||
// Is the sum negitive
|
||||
assign NegSum = SumTmp[162];
|
||||
// If the sum is negitive, negate the sum.
|
||||
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Normalization
|
||||
|
||||
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|Sum);
|
||||
|
||||
logic [12:0] ManLen;
|
||||
assign ManLen = FmtM ? 13'd52 : 13'd23;
|
||||
// Determine if the result is denormal
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-ManLen));
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
assign SumExpTmpMinus1 = SumExpTmp-1;
|
||||
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
|
||||
|
||||
// Normalize the sum
|
||||
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
|
||||
assign NormSum = NormSumTmp[161:108];
|
||||
// Calculate the sticky bit
|
||||
assign NormSumSticky = FmtM ? (|NormSumTmp[107:0]) : (|NormSumTmp[136:0]);
|
||||
assign Sticky = AddendStickyM | NormSumSticky;
|
||||
|
||||
// Determine sum's exponent
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign SumExp = SumZero ? 13'b0 :
|
||||
ResultDenorm ? 13'b0 :
|
||||
SumExpTmp;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Rounding
|
||||
|
||||
// round to nearest even
|
||||
// {Gaurd, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if NormSum[2] = 1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// round to zero - do nothing
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to -infinity - Plus1 if negitive
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the positive result
|
||||
|
||||
// round to infinity - Plus1 if positive
|
||||
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Gaurd, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
|
||||
// 101/110/111 - Plus1
|
||||
|
||||
// Deterimine if the result was supposed to be subtrated by a small number
|
||||
logic Gaurd, Round;
|
||||
assign Gaurd = FmtM ? NormSum[1] : NormSum[30];
|
||||
assign Round = FmtM ? NormSum[0] : NormSum[29];
|
||||
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: Plus1Tmp = Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&NormSum[2]));//round to nearest even
|
||||
3'b001: Plus1Tmp = 0;//round to zero
|
||||
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
|
||||
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
|
||||
3'b100: Plus1Tmp = (Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky)));//round to nearest max magnitude
|
||||
default: Plus1Tmp = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: Minus1Tmp = 0;//round to nearest even
|
||||
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
|
||||
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
|
||||
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
|
||||
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
|
||||
default: Minus1Tmp = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = Sticky | (Gaurd|Round) ? Plus1Tmp : 1'b0;
|
||||
assign Minus1 = Sticky | (Gaurd|Round) ? Minus1Tmp : 1'b0;
|
||||
// Compute rounded result
|
||||
assign {WExpTmp, WMan} = FmtM ? {SumExp, NormSum[53:2]} - {64'b0, Minus1} + {64'b0, Plus1} : {{SumExp, NormSum[53:31]} - {35'b0, Minus1} + {35'b0, Plus1}, 28'b0};
|
||||
assign WExp = WExpTmp[10:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Sign calculation
|
||||
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
assign ZeroSgn = Underflow & ~ResultDenorm ? PSgn :
|
||||
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn);
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgn = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
|
||||
assign WSgn = SumZero ? ZeroSgn : ResultSgn;
|
||||
|
||||
// Select the result
|
||||
assign FmaResultM = XNaNM ? (FmtM ? {XSgn, FInput1M[62:52], 1'b1,FInput1M[50:0]} : {XSgn, FInput1M[62:55], 1'b1,FInput1M[53:0]}) :
|
||||
YNaNM ? (FmtM ? {YSgn, FInput2M[62:52], 1'b1,FInput2M[50:0]} : {YSgn, FInput2M[62:55], 1'b1,FInput2M[53:0]}) :
|
||||
ZNaNM ? (FmtM ? {ZSgn, FInput3M2[62:52], 1'b1,FInput3M2[50:0]} : {ZSgn, FInput3M2[62:55], 1'b1,FInput3M2[53:0]}) :
|
||||
Invalid ? (FmtM ? {WSgn, 11'h7ff, 1'b1, 51'b0} : {WSgn, 8'h7f8, 1'b1, 54'b0}) : // has to be before inf
|
||||
XInfM ? {PSgn, FInput1M[62:0]} :
|
||||
YInfM ? {PSgn, FInput2M[62:0]} :
|
||||
ZInfM ? {ZSgn, FInput3M2[62:0]} :
|
||||
Overflow ? (FmtM ? {WSgn, 11'h7ff, 52'b0} : {WSgn, 8'h7f8, 55'b0}) :
|
||||
Underflow & ~ResultDenorm ? (FmtM ? {WSgn, 63'b0} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {{WSgn, 31'b0} - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : //***do you need minus1?
|
||||
KillProdM ? (FmtM ? FInput3M2 - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {FInput3M2[63:32] - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : // has to be after Underflow
|
||||
FmtM ? {WSgn,WExp,WMan} : {WSgn,WExp[6:0],WMan,4'b0};
|
||||
logic [63:0] tmp;
|
||||
assign tmp = {WSgn,WExp[6:0],WMan,4'b0};
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) any input is a signaling NaN
|
||||
logic [12:0] MaxExp;
|
||||
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
|
||||
assign ProdOf = (ProdExpM >= MaxExp && ~ProdExpM[12]);
|
||||
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
|
||||
assign SigNaN = FmtM ? (XNaNM&~FInput1M[51]) | (YNaNM&~FInput2M[51]) | (ZNaNM&~FInput3M2[51]) : (XNaNM&~FInput1M[54]) | (YNaNM&~FInput2M[54]) | (ZNaNM&~FInput3M2[54]);
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
assign Overflow = WExpTmp >= MaxExp & ~WExpTmp[12];
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
assign ProdUf = KillProdM & ZZeroM;
|
||||
assign Underflow = SumExp[12] | ProdUf;
|
||||
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
assign Inexact = (Sticky|Overflow| (Gaurd|Round))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow & Inexact, Inexact};
|
||||
|
||||
endmodule
|
||||
|
@ -1,40 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: lop.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements a Leading One Predictor used to determine
|
||||
// the normalization shift count.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module lza(sum, normcnt, sumzero);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [163:0] sum; // sum
|
||||
output logic [8:0] normcnt; // normalization shift count
|
||||
output logic sumzero; // sum = 0
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [8:0] i; // loop index
|
||||
|
||||
// A real LOP uses a fast carry chain to find only the first 0.
|
||||
// It is an example of a parallel prefix algorithm. For the sake
|
||||
// of simplicity, this model is behavioral instead.
|
||||
// A real LOP would also operate on the sources of the adder, not
|
||||
// the result!
|
||||
|
||||
always_comb
|
||||
begin
|
||||
i = 0;
|
||||
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
|
||||
normcnt = i; // compute shift count
|
||||
end
|
||||
|
||||
// Also check if sum is zero
|
||||
assign sumzero = ~(|sum);
|
||||
|
||||
endmodule
|
||||
|
@ -1,136 +0,0 @@
|
||||
|
||||
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [51:0] xman; // Fraction of multiplicand x
|
||||
input logic [51:0] yman; // Fraction of multiplicand y
|
||||
input logic xdenormE; // is x denormalized
|
||||
input logic ydenormE; // is y denormalized
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [105:0] rE; // partial product 1
|
||||
output logic [105:0] sE; // partial product 2
|
||||
|
||||
wire [54:0] yExt; //y with appended 0 and assumed 1
|
||||
wire [53:0] xExt; //y with assumed 1
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [106:0] tmpsE;
|
||||
logic [17:0][106:0] lv1add;
|
||||
logic [11:0][106:0] lv2add;
|
||||
logic [7:0][106:0] lv3add;
|
||||
logic [3:0][106:0] lv4add;
|
||||
logic [21:0][107:0] carryTmp;
|
||||
wire [26:0][106:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
|
||||
assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
|
||||
|
||||
generate
|
||||
for(i=0; i<27; i=i+1) begin
|
||||
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//*** resize adders
|
||||
generate
|
||||
for(i=0; i<9; i=i+1) begin
|
||||
add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<6; i=i+1) begin
|
||||
add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<4; i=i+1) begin
|
||||
add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
generate
|
||||
for(i=0; i<2; i=i+1) begin
|
||||
add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(tmpsE));
|
||||
assign sE = tmpsE[105:0];
|
||||
assign rE = {carryTmp[21][104:0], 1'b0};
|
||||
// assign rE = 0;
|
||||
// assign sE = acc[0] +
|
||||
// acc[1] +
|
||||
// acc[2] +
|
||||
// acc[3] +
|
||||
// acc[4] +
|
||||
// acc[5] +
|
||||
// acc[6] +
|
||||
// acc[7] +
|
||||
// acc[8] +
|
||||
// acc[9] +
|
||||
// acc[10] +
|
||||
// acc[11] +
|
||||
// acc[12] +
|
||||
// acc[13] +
|
||||
// acc[14] +
|
||||
// acc[15] +
|
||||
// acc[16] +
|
||||
// acc[17] +
|
||||
// acc[18] +
|
||||
// acc[19] +
|
||||
// acc[20] +
|
||||
// acc[21] +
|
||||
// acc[22] +
|
||||
// acc[23] +
|
||||
// acc[24] +
|
||||
// acc[25] +
|
||||
// acc[26];
|
||||
|
||||
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
|
||||
// assign rE = 0;
|
||||
endmodule
|
||||
|
@ -1,147 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: normalize.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block performs the normalization shift. It also
|
||||
// generates the Rands bits for rounding. Finally, it
|
||||
// handles the special case of a zero sum.
|
||||
//
|
||||
// v[53:2] is the fraction component of the prerounded result.
|
||||
// It can be bypassed back to the X or Z inputs of the FMAC
|
||||
// for back-to-back operations.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
|
||||
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
input logic [163:0] sum; // sum
|
||||
input logic [62:52] zexp; // sum
|
||||
input logic [8:0] normcnt; // normalization shift count
|
||||
input logic [12:0] aeM; // normalization shift count
|
||||
input logic [12:0] aligncntM; // normalization shift count
|
||||
input logic [8:0] sumshiftM; // normalization shift count
|
||||
input logic sumshiftzeroM;
|
||||
input logic sumzero; // sum is zero
|
||||
input logic bsM; // sticky bit for addend
|
||||
input logic xdenormM; // Input Z is denormalized
|
||||
input logic ydenormM; // Input Z is denormalized
|
||||
input logic zdenormM; // Input Z is denormalized
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
input logic zzeroM;
|
||||
output logic sticky; //sticky bit
|
||||
output logic [12:0] de0;
|
||||
output logic resultdenorm; // Input Z is denormalized
|
||||
output logic [53:0] v; // normalized sum, R, S bits
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic [163:0] sumshifted; // shifted sum
|
||||
logic [9:0] sumshifttmp;
|
||||
logic [163:0] sumshiftedtmp; // shifted sum
|
||||
logic isShiftLeft1;
|
||||
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
// When the sum is zero, normalization does not apply and only the
|
||||
// sticky bit must be computed. Otherwise, the sum is right-shifted
|
||||
// and the Rand S bits (v[1] and v[O], respectively) are assigned.
|
||||
|
||||
// The R bit is also set on denormalized numbers where the exponent
|
||||
// was computed to be exactly -1023 and the L bit was set. This
|
||||
// is required for correct rounding up of multiplication results.
|
||||
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
|
||||
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
|
||||
always_comb
|
||||
begin
|
||||
// d = aligncntM
|
||||
// l = normcnt
|
||||
// p = 53
|
||||
// ea + eb = aeM
|
||||
// set d<=2 to d<=0
|
||||
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
|
||||
// product anchored or cancellation
|
||||
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
|
||||
//normal result
|
||||
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
|
||||
resultdenorm = |sum & ~|de0 | de0[12];
|
||||
// if z is zero then there was a 56 bit shift of the product
|
||||
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
//de0 = aeM-normcnt+2-1023;
|
||||
end else begin
|
||||
sumshifted = sum << (13'd1080+aeM);
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
resultdenorm = 1;
|
||||
de0 = 0;
|
||||
end
|
||||
|
||||
end else begin // extract normalized bits
|
||||
sumshifttmp = {1'b0,sumshiftM} - 2;
|
||||
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
|
||||
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
|
||||
tmp2 = ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]);
|
||||
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1]));
|
||||
tmp4 = sumshifted[160];
|
||||
tmp5 = sumshifted[159];
|
||||
// for some reason use exp = zexp + {0,1,2}
|
||||
// the book says exp = zexp + {-1,0,1}
|
||||
if(sumshiftzeroM) begin
|
||||
v = sum[162:109];
|
||||
sticky = (|sum[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp};
|
||||
end else if(sumshifted[163] & ~sumshifttmp[9])begin
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp} +13'd2;
|
||||
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
|
||||
v = sumshifted[161:108];
|
||||
sticky = (|sumshifted[107:0]) | bsM;
|
||||
de0 = {2'b0,zexp}+13'd1;
|
||||
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = {2'b0,zexp}+{12'b0,zdenormM};
|
||||
end else if(sumshifted[160]& ~zdenormM) begin
|
||||
de0 = {2'b0,zexp}-13'b1;
|
||||
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
|
||||
sticky = (|sumshifted[105:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(sumshifted[159]& ~zdenormM) begin
|
||||
//v = sumshifted[158:105];
|
||||
de0 = {2'b0,zexp}-13'd2;
|
||||
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
|
||||
sticky = (|sumshifted[104:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(zdenormM) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = {{2{zexp[62]}},zexp};
|
||||
end else begin
|
||||
de0 = 0;
|
||||
sumshifted = sum << sumshiftM-1; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
end
|
||||
|
||||
resultdenorm = (~|de0 | de0[12]);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// shift sum left by normcnt, filling the right with zeros
|
||||
//assign sumshifted = sum << normcnt;
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -1,124 +0,0 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: round.v
|
||||
// Author: David Harris
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
//
|
||||
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
|
||||
// muxed in to form the actual result for register file writeback. This
|
||||
// saves a mux from the writeback path.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module round(v, sticky, FrmM, wsign,
|
||||
FmaFlagsM, inf, nanM, xnanM, ynanM, znanM,
|
||||
xman, yman, zman,
|
||||
wman, infinity, specialsel,expplus1);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [53:0] v; // normalized sum, R, S bits
|
||||
input logic sticky; //sticky bit
|
||||
input logic [2:0] FrmM;
|
||||
input logic wsign; // Sign of result
|
||||
input logic [4:0] FmaFlagsM;
|
||||
input logic inf; // Some input logic is infinity
|
||||
input logic nanM; // Some input logic is NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic [51:0] xman; // input logic X
|
||||
input logic [51:0] yman; // input logic Y
|
||||
input logic [51:0] zman; // input logic Z
|
||||
output logic [51:0] wman; // rounded result of FMAC
|
||||
output logic infinity; // Generate infinity on overflow
|
||||
output logic specialsel; // Select special result
|
||||
output logic expplus1;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
logic plus1; // Round by adding one
|
||||
wire [52:0] v1; // Result + 1 (for rounding)
|
||||
wire [51:0] specialres; // Result of exceptional case
|
||||
wire [51:0] infinityres; // Infinity or largest real number
|
||||
wire [51:0] nanres; // Propagated or generated NaN
|
||||
|
||||
// Compute if round should occur. This equation is derived from
|
||||
// the rounding tables.
|
||||
|
||||
// round to infinity - plus1 if positive
|
||||
// round to -infinity - plus1 if negitive
|
||||
// round to zero - do nothing
|
||||
// round to nearest even
|
||||
// {v[1], v[0], sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - plus1 if v[2] = 1
|
||||
// 101/110/111 - plus1
|
||||
always_comb begin
|
||||
case (FrmM)
|
||||
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
|
||||
3'b001: plus1 = 0;//round to zero
|
||||
3'b010: plus1 = wsign;//round down
|
||||
3'b011: plus1 = ~wsign;//round up
|
||||
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
|
||||
default: plus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
|
||||
// (rp & ~wsign) |
|
||||
// (rm & wsign);
|
||||
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
|
||||
// rp && ~wsign && (v[1] || v[0]) ||
|
||||
// rm && wsign && (v[1] || v[0]);
|
||||
|
||||
// Compute rounded result
|
||||
assign v1 = v[53:2] + 1;
|
||||
// Determine if postnormalization is necessary
|
||||
// Predicted by all bits =1 before round +1
|
||||
|
||||
//assign postnormalize = &(v[53:2]) && plus1;
|
||||
|
||||
// Determine special result in event of of selection of a result from
|
||||
// another FPU functional unit, infinity, NAN, or underflow
|
||||
// The special result mux is a 4:1 mux that should not appear in the
|
||||
// critical path of the machine. It is not priority encoded, despite
|
||||
// the code below suggesting otherwise. Also, several of the identical data
|
||||
// input logics to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
|
||||
nanM || inf;
|
||||
assign specialres = FmaFlagsM[4] | nanM ? nanres : //invalid
|
||||
FmaFlagsM[2] ? infinityres : //overflow
|
||||
inf ? 52'b0 :
|
||||
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
|
||||
|
||||
// Overflow is handled differently for different rounding modes
|
||||
// Round is to either infinity or to maximum finite number
|
||||
|
||||
assign infinity = |FrmM;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
// propagate an input logic if the input logic is NaN. Since we assume all
|
||||
// NaN input logics are already quiet, we don't have to force them quiet.
|
||||
|
||||
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input logic NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input logic NaNs will provide the payload."
|
||||
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
|
||||
|
||||
// Select result with 4:1 mux
|
||||
// If the sum is zero and we round up, there is a special case in
|
||||
// which we produce a massive loss of significance and trap to software.
|
||||
// It is handled in the exception unit.
|
||||
assign expplus1 = v1[52] & ~specialsel & plus1;
|
||||
assign wman = specialsel ? specialres : (plus1 ? v1[51:0] : v[53:2]);
|
||||
|
||||
endmodule
|
||||
|
@ -1,111 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: sign.v
|
||||
// Author: David Harris
|
||||
// Date: 12/1/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block manages the signs of the numbers.
|
||||
// 1 = negative
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
|
||||
////////////////////////////////////////////////////////////////////////////I
|
||||
|
||||
input logic xsign; // Sign of X
|
||||
input logic ysign; // Sign of Y
|
||||
input logic zsign; // Sign of Z
|
||||
input logic isAdd;
|
||||
input logic negsum0; // Sum in +O mode is negative
|
||||
input logic negsum1; // Sum in +1 mode is negative
|
||||
input logic bsM; // sticky bit from addend
|
||||
input logic [2:0] FrmM; // Round toward minus infinity
|
||||
input logic [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input logic sumzero; // Sum = O
|
||||
input logic zinfM; // Y = Inf
|
||||
input logic inf; // Some input logic = Inf
|
||||
output logic wsign; // Sign of W
|
||||
output logic invz; // Invert addend into adder
|
||||
output logic negsum; // Negate result of adder
|
||||
output logic selsum1; // Select +1 mode from compound adder
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire zerosign; // sign if result= 0
|
||||
wire sumneg; // sign if result= 0
|
||||
wire infsign; // sign if result= Inf
|
||||
logic tmp;
|
||||
|
||||
// Compute sign of product
|
||||
|
||||
assign psign = xsign ^ ysign;
|
||||
|
||||
// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
|
||||
|
||||
//do you invert z
|
||||
assign invz = (zsign ^ psign);
|
||||
|
||||
assign selsum1 = invz;
|
||||
//negate sum if its negitive
|
||||
assign negsum = (selsum1&negsum1) | (~selsum1&negsum0);
|
||||
// is the sum negitive
|
||||
// if p - z is the sum negitive
|
||||
// if -p + z is the sum positive
|
||||
// if -p - z then the sum is negitive
|
||||
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
|
||||
//always @(invz or negsum0 or negsum1 or bsM or ps)
|
||||
// begin
|
||||
// if (~invz) begin // both input logics have same sign
|
||||
// negsum = 0;
|
||||
// selsum1 = 0;
|
||||
// end else if (bsM) begin // sticky bit set on addend
|
||||
// selsum1 = 0;
|
||||
// negsum = negsum0;
|
||||
// end else if (ps) begin // sticky bit set on product
|
||||
// selsum1 = 1;
|
||||
// negsum = negsum1;
|
||||
// end else begin // both sticky bits clear
|
||||
// //selsum1 = negsum1; // KEP 210113-10:44 Selsum1 was adding 1 to values that were multiplied by 0
|
||||
// selsum1 = ~negsum1; //original
|
||||
// negsum = negsum1;
|
||||
// end
|
||||
//end
|
||||
|
||||
// Compute sign of result
|
||||
// This involves a special case when the sum is zero:
|
||||
// x+x retains the same sign as x even when x = +/- 0.
|
||||
// otherwise, x-x = +O unless in the RM mode when x-x = -0
|
||||
// There is also a special case for NaNs and invalid results;
|
||||
// the sign of the NaN produced is forced to be 0.
|
||||
// Sign calculation is not in the critical path so the cases
|
||||
// can be tolerated.
|
||||
// IEEE 754-2008 section 6.3 states
|
||||
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// also pertaining to negZero it states:
|
||||
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
|
||||
|
||||
//assign zerosign = (~invz && killprodM) ? zsign : rm;//***look into
|
||||
// assign zerosign = (~invz && killprodM) ? zsign : 0;
|
||||
// zero sign
|
||||
// if product underflows then use psign
|
||||
// otherwise
|
||||
// addition
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
// subtraction
|
||||
// if cancelation then 0 unless round to -inf
|
||||
// otherwise psign
|
||||
|
||||
assign zerosign = FmaFlagsM[1] ? psign :
|
||||
(isAdd ? (psign^zsign ? FrmM == 3'b010 : psign) :
|
||||
(psign^zsign ? psign : FrmM == 3'b010));
|
||||
assign infsign = zinfM ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
|
||||
//assign infsign = xinfM ? (yinfM ? psign : xsign) : yinfM ? ysign : zsign;//original
|
||||
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
|
||||
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
|
||||
|
||||
endmodule
|
@ -1,67 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Block Name: special.v
|
||||
// Author: David Harris
|
||||
// Date: 12/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block implements special case handling for unusual operands (e.g.
|
||||
// 0, NaN, denormalize, infinity). The block consists of zero/one detectors.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
|
||||
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [63:0] ReadData1E; // Input ReadData1E
|
||||
input logic [63:0] ReadData2E; // Input ReadData2E
|
||||
input logic [63:0] ReadData3E; // Input ReadData3E
|
||||
output logic xzeroE; // Input ReadData1E = 0
|
||||
output logic yzeroE; // Input ReadData2E = 0
|
||||
output logic zzeroE; // Input ReadData3E = 0
|
||||
output logic xnanE; // ReadData1E is NaN
|
||||
output logic ynanE; // ReadData2E is NaN
|
||||
output logic znanE; // ReadData3E is NaN
|
||||
output logic xdenormE; // ReadData1E is denormalized
|
||||
output logic ydenormE; // ReadData2E is denormalized
|
||||
output logic zdenormE; // ReadData3E is denormalized
|
||||
output logic xinfE; // ReadData1E is infinity
|
||||
output logic yinfE; // ReadData2E is infinity
|
||||
output logic zinfE; // ReadData3E is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
|
||||
// Check if input is NaN
|
||||
|
||||
assign xnanE = &ReadData1E[62:52] && |ReadData1E[51:0];
|
||||
assign ynanE = &ReadData2E[62:52] && |ReadData2E[51:0];
|
||||
assign znanE = &ReadData3E[62:52] && |ReadData3E[51:0];
|
||||
|
||||
// Check if input is denormalized
|
||||
|
||||
assign xdenormE = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
|
||||
assign ydenormE = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
|
||||
assign zdenormE = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
|
||||
|
||||
// Check if input is infinity
|
||||
|
||||
assign xinfE = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
|
||||
assign yinfE = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
|
||||
assign zinfE = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
|
||||
|
||||
// Check if inputs are all zero
|
||||
// Also forces denormalized inputs to zero.
|
||||
// In the circuit implementation, this can be optimized
|
||||
// to just check if the exponent is zero.
|
||||
|
||||
// KATHERINE - commented following (21/01/11)
|
||||
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
|
||||
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
|
||||
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
|
||||
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
|
||||
assign xzeroE = ~(|ReadData1E[62:0]);
|
||||
assign yzeroE = ~(|ReadData2E[62:0]);
|
||||
assign zzeroE = ~(|ReadData3E[62:0]);
|
||||
endmodule
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
417
wally-pipelined/src/fpu/faddcvt.sv
Executable file
417
wally-pipelined/src/fpu/faddcvt.sv
Executable file
@ -0,0 +1,417 @@
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and convert SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller exponent,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put sum onto output.
|
||||
//
|
||||
|
||||
module faddcvt(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic [63:0] SrcXE, // 1st input operand (A)
|
||||
input logic [63:0] SrcYE, // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
|
||||
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input logic [2:0] FrmM, // Rounding mode - specify values
|
||||
output logic [63:0] FAddResM, // Result of operation
|
||||
output logic [4:0] FAddFlgM); // IEEE exception flags
|
||||
|
||||
logic [63:0] AddSumE, AddSumM;
|
||||
logic [63:0] AddSumTcE, AddSumTcM;
|
||||
logic [3:0] AddSelInvE, AddSelInvM;
|
||||
logic [10:0] AddExpPostSumE,AddExpPostSumM;
|
||||
logic AddCorrSignE, AddCorrSignM;
|
||||
logic AddOp1NormE, AddOp1NormM;
|
||||
logic AddOp2NormE, AddOp2NormM;
|
||||
logic AddOpANormE, AddOpANormM;
|
||||
logic AddOpBNormE, AddOpBNormM;
|
||||
logic AddInvalidE, AddInvalidM;
|
||||
logic AddDenormInE, AddDenormInM;
|
||||
logic AddSwapE, AddSwapM;
|
||||
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
|
||||
logic AddSignAE, AddSignAM;
|
||||
logic AddConvertE, AddConvertM;
|
||||
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
|
||||
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
|
||||
logic [10:0] AddExponentE, AddExponentM;
|
||||
|
||||
|
||||
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
|
||||
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
|
||||
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
|
||||
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
|
||||
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
|
||||
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
|
||||
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
|
||||
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
|
||||
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
|
||||
|
||||
|
||||
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
|
||||
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
|
||||
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
|
||||
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
|
||||
endmodule
|
||||
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
|
||||
|
||||
input logic [63:0] SrcXE; // 1st input operand (A)
|
||||
input logic [63:0] SrcYE; // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE; // Function opcode
|
||||
input logic FmtE; // Result Precision (1 for double, 0 for single)
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtE;
|
||||
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
wire [11:0] exp_diff1, exp_diff2;
|
||||
wire [11:0] exp_shift;
|
||||
wire [51:0] mantissaA;
|
||||
wire [56:0] mantissaA1;
|
||||
wire [63:0] mantissaA3;
|
||||
wire [51:0] mantissaB;
|
||||
wire [56:0] mantissaB1, mantissaB2;
|
||||
wire [63:0] mantissaB3;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire sub;
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
output logic [63:0] AddFloat1E;
|
||||
output logic [63:0] AddFloat2E;
|
||||
output logic [10:0] AddExponentE;
|
||||
output logic [10:0] AddExpPostSumE;
|
||||
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
|
||||
output logic [63:0] AddSumE, AddSumTcE;
|
||||
output logic [3:0] AddSelInvE;
|
||||
output logic AddCorrSignE;
|
||||
output logic AddSignAE;
|
||||
output logic AddOp1NormE, AddOp2NormE;
|
||||
output logic AddOpANormE, AddOpBNormE;
|
||||
output logic AddInvalidE;
|
||||
output logic AddDenormInE;
|
||||
// output logic exp_valid;
|
||||
output logic AddConvertE;
|
||||
output logic AddSwapE;
|
||||
output logic AddNormOvflowE;
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the FOpCtrlE , and their precision P.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
// the third pipeline stage to select the result. Also, AddOp1NormE
|
||||
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
|
||||
AddFloat1E, AddFloat2E, FOpCtrlE);
|
||||
|
||||
// Perform Exponent Subtraction (used for alignment). For performance
|
||||
// both exponent subtractions are performed in parallel. This was
|
||||
// changed to a behavior level to allow the tools to try to optimize
|
||||
// the two parallel additions. The input values are zero-extended to 12
|
||||
// bits prior to performing the addition.
|
||||
|
||||
assign exp1 = {1'b0, AddFloat1E[62:52]};
|
||||
assign exp2 = {1'b0, AddFloat2E[62:52]};
|
||||
assign exp_diff1 = exp1 - exp2;
|
||||
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
|
||||
|
||||
// The second operand (B) should be set to zero, if FOpCtrlE does not
|
||||
// specify addition or subtraction
|
||||
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
|
||||
|
||||
// Swapped operands if zeroB is not one and exp1 < exp2.
|
||||
// Swapping causes exp2 to be used for the result exponent.
|
||||
// Only the exponent of the larger operand is used to determine
|
||||
// the final result.
|
||||
assign AddSwapE = exp_diff1[11] & ~zeroB;
|
||||
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
|
||||
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
|
||||
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
// modified to 52 bits to detect leading zeroes on denormalized mantissas
|
||||
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
|
||||
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
|
||||
|
||||
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
|
||||
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
|
||||
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
|
||||
|
||||
// Determine the alignment shift and limit it to 63. If any bit from
|
||||
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
|
||||
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
|
||||
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
|
||||
| exp_shift[8] | exp_shift[7] | exp_shift[6];
|
||||
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
|
||||
|
||||
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
|
||||
// 001.M[51]M[50] ... M[1]M[0]00
|
||||
// Unless the number has an exponent of zero, in which case it
|
||||
// is unpacked as
|
||||
// 000.00 ... 00
|
||||
// This effectively flushes denormalized values to zero.
|
||||
// The three bits of to the left of the binary point prevent overflow
|
||||
// and loss of sign information. The two bits to the right of the
|
||||
// original mantissa form the "guard" and "round" bits that are used
|
||||
// to round the result.
|
||||
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
|
||||
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
|
||||
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
|
||||
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
|
||||
|
||||
// Perform mantissa alignment using a 57-bit barrel shifter
|
||||
// If any of the bits shifted out are one, Sticky_out is set.
|
||||
// The size of the barrel shifter could be reduced by two bits
|
||||
// by not adding the leading two zeros until after the shift.
|
||||
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
assign IntValue [31:0] = SrcXE[31:0];
|
||||
assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
|
||||
// and the exponent value is left unchanged.
|
||||
// Under denormalized cases, the exponent before the rounder is set to 1
|
||||
// if the normal shift value is 11.
|
||||
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
|
||||
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
|
||||
|
||||
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
|
||||
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
|
||||
// zeros.
|
||||
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
|
||||
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
|
||||
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
|
||||
|
||||
// The sign of the result needs to be corrected if the true
|
||||
// operation is subtraction and the input operands were swapped.
|
||||
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
|
||||
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and AddConvertM SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put AddSumM onto output.
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
|
||||
|
||||
input [2:0] FrmM; // Rounding mode - specify values
|
||||
input [3:0] FOpCtrlM; // Function opcode
|
||||
input FmtM; // Result Precision (0 for double, 1 for single)
|
||||
// input AddOvEnM; // Overflow trap enabled
|
||||
// input AddUnEnM; // Underflow trap enabled
|
||||
input [63:0] AddSumM, AddSumTcM;
|
||||
input [63:0] AddFloat1M;
|
||||
input [63:0] AddFloat2M;
|
||||
input [11:0] AddExp1DenormM, AddExp2DenormM;
|
||||
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
|
||||
//input exp_valid;
|
||||
input [3:0] AddSelInvM;
|
||||
input AddOp1NormM, AddOp2NormM;
|
||||
input AddOpANormM, AddOpBNormM;
|
||||
input AddInvalidM;
|
||||
input AddDenormInM;
|
||||
input AddSignAM;
|
||||
input AddCorrSignM;
|
||||
input AddConvertM;
|
||||
input AddSwapM;
|
||||
// input AddNormOvflowM;
|
||||
|
||||
output [63:0] FAddResM; // Result of operation
|
||||
output [4:0] FAddFlgM; // IEEE exception flags
|
||||
wire AddDenormM; // AddDenormM on input or output
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtM;
|
||||
|
||||
wire [10:0] exp_pre;
|
||||
wire [63:0] Result;
|
||||
wire [63:0] sum_norm, sum_norm_w_bypass;
|
||||
wire [5:0] norm_shift, norm_shift_denorm;
|
||||
wire exp_valid;
|
||||
wire DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire Sticky_out;
|
||||
wire sign_corr;
|
||||
wire zeroB;
|
||||
wire [10:0] AddExpPostSumM;
|
||||
wire mantissa_comp;
|
||||
wire mantissa_comp_sum;
|
||||
wire mantissa_comp_sum_tc;
|
||||
wire Float1_sum_comp;
|
||||
wire Float2_sum_comp;
|
||||
wire Float1_sum_tc_comp;
|
||||
wire Float2_sum_tc_comp;
|
||||
wire normal_underflow;
|
||||
wire [63:0] sum_corr;
|
||||
logic AddNormOvflowM;
|
||||
|
||||
|
||||
logic AddOvEnM; // Overflow trap enabled
|
||||
logic AddUnEnM; // Underflow trap enabled
|
||||
|
||||
assign AddOvEnM = 1'b1;
|
||||
assign AddUnEnM = 1'b1;
|
||||
//AddExponentM value pre-rounding with considerations for denormalized
|
||||
//cases/conversion cases
|
||||
assign exp_pre = AddDenormInM ?
|
||||
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
|
||||
: (AddConvertM ? 11'b10000111100 : AddExponentM);
|
||||
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
// Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
|
||||
assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
|
||||
|
||||
// Determines the correct Float value to compare based on AddSwapM result
|
||||
assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
|
||||
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
|
||||
|
||||
// Determines the correct comparison result based on operation and sign of resulting AddSumM
|
||||
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
|
||||
|
||||
// If the signs are different and both operands aren't denormalized
|
||||
// the normal underflow bit is needed and therefore updated.
|
||||
assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
|
||||
|
||||
// Determine the correct sign of the result
|
||||
assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];
|
||||
|
||||
// If the AddSumM is negative, use its two complement instead.
|
||||
// This value has to be 64-bits to correctly handle the
|
||||
// case 10...00
|
||||
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
|
||||
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
|
||||
|
||||
assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);
|
||||
|
||||
// Barell shifter used for normalization. It takes as inputs the
|
||||
// the corrected AddSumM and the amount by which the AddSumM should
|
||||
// be right shifted. It outputs the normalized AddSumM.
|
||||
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
|
||||
|
||||
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. If the result is a single precision number, the actual
|
||||
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
|
||||
// At this point, normalization has already been performed, so we know
|
||||
// exactly where the rounding point is. The rounding units also
|
||||
// handles special cases and set the exception flags.
|
||||
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
|
||||
// help in processor reservation station detection of load/stores. In
|
||||
// other words, the processor would like to know ahead of time that
|
||||
// if the result is an exception then don't load or store.
|
||||
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
|
||||
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
|
||||
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
|
||||
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
assign FAddResM = Result;
|
||||
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -64,38 +64,38 @@ module fctrl (
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.s.w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.s.wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.s.l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.s.lu
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.s.w
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.s.wu
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.s.l
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.s.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_1_100_0010_00_00_0_0; // fcvt.w.s
|
||||
2'b01: ControlsD = `FCTRLW'b1_1_100_0110_00_00_0_0; // fcvt.wu.s
|
||||
2'b10: ControlsD = `FCTRLW'b1_1_100_1010_00_00_0_0; // fcvt.l.s
|
||||
2'b11: ControlsD = `FCTRLW'b1_1_100_1110_00_00_0_0; // fcvt.lu.s
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.s
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.s
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.s
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fcvt.s.d
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.d.w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.d.wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.d.l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.d.lu
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.s.d
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.d.w
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.d.wu
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.d.l
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.d.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_100_0010_00_00_0_0; // fcvt.w.d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_100_0110_00_00_0_0; // fcvt.wu.d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_100_1010_00_00_0_0; // fcvt.l.d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_100_1110_00_00_0_0; // fcvt.lu.d
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.d
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.d
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.d
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.d
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fcvt.d.s
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
@ -109,7 +109,7 @@ module fctrl (
|
||||
// Precision
|
||||
// 0-single
|
||||
// 1-double
|
||||
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0];
|
||||
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
|
163
wally-pipelined/src/fpu/fcvt.sv
Normal file
163
wally-pipelined/src/fpu/fcvt.sv
Normal file
@ -0,0 +1,163 @@
|
||||
|
||||
// `include "wally-config.vh"
|
||||
module fcvt (
|
||||
input logic [63:0] X,
|
||||
input logic [64-1:0] SrcAE,
|
||||
input logic [3:0] FOpCtrlE,
|
||||
input logic [2:0] FrmE,
|
||||
input logic FmtE,
|
||||
output logic [63:0] CvtResE,
|
||||
output logic [4:0] CvtFlgE);
|
||||
|
||||
logic [10:0] XExp;
|
||||
logic [51:0] XFrac;
|
||||
logic XSgn;
|
||||
logic [10:0] ResExp,TmpExp;
|
||||
logic [51:0] ResFrac;
|
||||
logic ResSgn;
|
||||
logic [10:0] NormCnt;
|
||||
logic [11:0] Bias; // 1023 for double, 127 for single
|
||||
logic [7:0] Bits, SubBits;
|
||||
logic [64+51:0] ShiftedManTmp;
|
||||
logic [64+51:0] ShiftVal;
|
||||
logic [64+1:0] ShiftedMan;
|
||||
logic [64:0] RoundedTmp;
|
||||
logic [63:0] Rounded;
|
||||
logic [12:0] ExpVal, ShiftCnt;
|
||||
logic [64-1:0] PosInt;
|
||||
|
||||
logic [64-1:0] CvtIntRes;
|
||||
logic [63:0] CvtRes;
|
||||
logic XFracZero, Of,Uf;
|
||||
logic XExpMax;
|
||||
logic XNaN, XDenorm, XInf, XZero;
|
||||
logic Plus1,CalcPlus1, Guard, Round, LSB, Sticky;
|
||||
logic SgnRes, In64;
|
||||
logic Res64;
|
||||
logic RoundMSB;
|
||||
logic RoundSgn;
|
||||
logic XExpZero;
|
||||
|
||||
// fcvt.w.s = 0010 -
|
||||
// fcvt.wu.s = 0110 -
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.l.s = 1010 -
|
||||
// fcvt.lu.s = 1110 -
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010 -
|
||||
// fcvt.wu.d = 0110 -
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.l.d = 1010 -
|
||||
// fcvt.lu.d = 1110 -
|
||||
// fcvt.d.l = 1001 --
|
||||
// fcvt.d.lu = 1101 --
|
||||
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
|
||||
assign XSgn = X[63];
|
||||
assign XExp = FmtE ? X[62:52] : {3'b0, X[62:55]};
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
assign XExpZero = ~|XExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign XNaN = XExpMax & ~XFracZero;
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign XInf = XExpMax & XFracZero;
|
||||
assign XZero = XExpZero & XFracZero;
|
||||
|
||||
|
||||
assign Bias = FmtE ? 12'h3ff : 12'h7f;
|
||||
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
|
||||
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
|
||||
assign SubBits = In64 ? 8'd64 : 8'd32;
|
||||
assign Bits = Res64 ? 8'd64 : 8'd32;
|
||||
assign ExpVal = XExp - Bias + XDenorm;
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
logic [64-1:0] IntIn;
|
||||
assign IntIn = FOpCtrlE[3] ? SrcAE : {SrcAE[31:0], 32'b0};
|
||||
assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
|
||||
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~PosInt[64-1-i] && i <= 64) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
end
|
||||
assign TmpExp = i==64 ? 0 : Bias + SubBits - NormCnt;
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : NormCnt;
|
||||
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
|
||||
//if shift = -1 then shift one bit right for round to nearest (shift over 2 never rounds)
|
||||
// if the shift is negitive add bit for sticky bit
|
||||
// otherwise shift left
|
||||
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {115'b0, ~XZero} : ShiftVal << ShiftCnt;
|
||||
|
||||
assign ShiftedMan = ShiftedManTmp[64+51:50];
|
||||
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
|
||||
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = FOpCtrlE[1] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
|
||||
assign Round = FOpCtrlE[1] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
|
||||
assign LSB = FOpCtrlE[1] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = (XSgn&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
|
||||
3'b011: CalcPlus1 = (~XSgn&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
|
||||
3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);
|
||||
|
||||
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
|
||||
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
|
||||
|
||||
assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
|
||||
XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
|
||||
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
|
||||
assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
|
||||
|
||||
|
||||
|
||||
// Choose result
|
||||
// double to unsigned long
|
||||
// >2^64-1 or +inf or NaN - all 1's
|
||||
// <0 or -inf - zero
|
||||
// otherwise rounded result
|
||||
//assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
|
||||
assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
|
||||
assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
|
||||
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
|
||||
assign CvtIntRes = Of ? FOpCtrlE[2] ? SgnRes ? {32'b0, {32{1'b1}}}: {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} :
|
||||
Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
|
||||
Rounded[64-1:0];
|
||||
|
||||
assign CvtRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
|
||||
assign CvtResE = FOpCtrlE[0] ? CvtRes : CvtIntRes;
|
||||
assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
|
||||
|
||||
|
||||
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -1,3 +1,231 @@
|
||||
module fma(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic [63:0] SrcXE, SrcXM, // X
|
||||
input logic [63:0] SrcYE, SrcYM, // Y
|
||||
input logic [63:0] SrcZE, SrcZM, // Z
|
||||
input logic FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
output logic [63:0] FMAResM,
|
||||
output logic [4:0] FMAFlgM);
|
||||
|
||||
|
||||
logic [105:0] ProdManE, ProdManM;
|
||||
logic [161:0] AlignedAddendE, AlignedAddendM;
|
||||
logic [12:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
|
||||
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
|
||||
|
||||
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XNaNE, .YNaNE, .ZNaNE );
|
||||
|
||||
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
|
||||
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
|
||||
|
||||
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM, .FrmM, .FmtM,
|
||||
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
|
||||
.FMAResM, .FMAFlgM);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [213:0] ZManPreShifted; // input to the alignment shifter
|
||||
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
|
||||
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63];
|
||||
|
||||
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
|
||||
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// determine if an input is a special value
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign YDenorm = YExpZero & ~YFracZero;
|
||||
assign ZDenorm = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenorm + YDenorm;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted << -AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted >> AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = 0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign AlignedAddendE = ZManShifted[213:52];
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module fma2(
|
@ -1,184 +0,0 @@
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [213:0] ZManPreShifted; // input to the alignment shifter
|
||||
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
|
||||
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63];
|
||||
|
||||
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
|
||||
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// determine if an input is a special value
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign YDenorm = YExpZero & ~YFracZero;
|
||||
assign ZDenorm = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenorm + YDenorm;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted << -AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted >> AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = 0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign AlignedAddendE = ZManShifted[213:52];
|
||||
|
||||
endmodule
|
@ -45,7 +45,7 @@ module fpu (
|
||||
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
|
||||
|
||||
generate
|
||||
if (`F_SUPPORTED) begin
|
||||
if (`F_SUPPORTED | `D_SUPPORTED) begin
|
||||
// control logic signal instantiation
|
||||
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
|
||||
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
|
||||
@ -75,39 +75,15 @@ module fpu (
|
||||
logic [63:0] DivInput1E, DivInput2E;
|
||||
logic HoldInputs; // keep forwarded inputs arround durring division
|
||||
|
||||
// FMA signals
|
||||
logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units
|
||||
logic [161:0] AlignedAddendE, AlignedAddendM;
|
||||
logic [12:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
|
||||
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
|
||||
//fpu signals
|
||||
logic [63:0] FMAResM, FMAResW;
|
||||
logic [4:0] FMAFlgM, FMAFlgW;
|
||||
|
||||
// add/cvt signals
|
||||
logic [63:0] AddSumE, AddSumM;
|
||||
logic [63:0] AddSumTcE, AddSumTcM;
|
||||
logic [3:0] AddSelInvE, AddSelInvM;
|
||||
logic [10:0] AddExpPostSumE,AddExpPostSumM;
|
||||
logic AddCorrSignE, AddCorrSignM;
|
||||
logic AddOp1NormE, AddOp1NormM;
|
||||
logic AddOp2NormE, AddOp2NormM;
|
||||
logic AddOpANormE, AddOpANormM;
|
||||
logic AddOpBNormE, AddOpBNormM;
|
||||
logic AddInvalidE, AddInvalidM;
|
||||
logic AddDenormInE, AddDenormInM;
|
||||
logic AddSwapE, AddSwapM;
|
||||
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
|
||||
logic AddSignAE, AddSignAM;
|
||||
logic AddConvertE, AddConvertM;
|
||||
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
|
||||
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
|
||||
logic [10:0] AddExponentE, AddExponentM;
|
||||
logic [63:0] FAddResM, FAddResW;
|
||||
logic [4:0] FAddFlgM, FAddFlgW;
|
||||
logic [63:0] CvtResE, CvtResM;
|
||||
logic [4:0] CvtFlgE, CvtFlgM;
|
||||
|
||||
// cmp signals
|
||||
logic CmpNVE, CmpNVM, CmpNVW;
|
||||
@ -117,7 +93,7 @@ module fpu (
|
||||
logic [63:0] SgnResE, SgnResM;
|
||||
logic SgnNVE, SgnNVM, SgnNVW;
|
||||
logic [63:0] FResM, FResW;
|
||||
logic FFlgM, FFlgW;
|
||||
logic [4:0] FFlgM, FFlgW;
|
||||
|
||||
// instantiation of W stage regfile signals
|
||||
logic [63:0] AlignedSrcAM;
|
||||
@ -198,9 +174,10 @@ module fpu (
|
||||
|
||||
|
||||
// first of two-stage instance of floating-point fused multiply-add unit
|
||||
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XNaNE, .YNaNE, .ZNaNE );
|
||||
fma fma (.clk, .reset, .FlushM, .StallM,
|
||||
.SrcXE, .SrcYE, .SrcZE, .SrcXM, .SrcYM, .SrcZM,
|
||||
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
|
||||
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
|
||||
|
||||
// first and only instance of floating-point divider
|
||||
logic fpdivClk;
|
||||
@ -225,10 +202,8 @@ module fpu (
|
||||
|
||||
|
||||
// first of two-stage instance of floating-point add/cvt unit
|
||||
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
|
||||
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
|
||||
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
|
||||
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
|
||||
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
|
||||
.SrcXE, .SrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
|
||||
|
||||
// first and only instance of floating-point comparator
|
||||
fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE);
|
||||
@ -239,6 +214,9 @@ module fpu (
|
||||
// first and only instance of floating-point classify unit
|
||||
fclassify fclassify (.SrcXE, .FmtE, .ClassResE);
|
||||
|
||||
|
||||
fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
|
||||
|
||||
// output for store instructions
|
||||
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
|
||||
//***swap to mux
|
||||
@ -259,31 +237,16 @@ module fpu (
|
||||
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
|
||||
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
|
||||
|
||||
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
|
||||
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
|
||||
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
|
||||
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
|
||||
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
|
||||
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
|
||||
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
|
||||
|
||||
|
||||
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
|
||||
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
|
||||
|
||||
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
|
||||
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
|
||||
|
||||
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
|
||||
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
|
||||
|
||||
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
|
||||
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
|
||||
{FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
|
||||
@ -299,29 +262,27 @@ module fpu (
|
||||
|
||||
//BEGIN MEMORY STAGE
|
||||
|
||||
mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM);
|
||||
mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM);
|
||||
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
|
||||
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
|
||||
|
||||
//***change to mux
|
||||
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
|
||||
mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
|
||||
// second instance of two-stage FMA unit
|
||||
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM,
|
||||
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
|
||||
.FMAResM, .FMAFlgM);
|
||||
|
||||
// second instance of two-stage floating-point add/cvt unit
|
||||
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
|
||||
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
|
||||
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
|
||||
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
|
||||
|
||||
// Align SrcA to MSB when single precicion
|
||||
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
|
||||
|
||||
|
||||
always_comb begin
|
||||
case (FResultSelM)
|
||||
3'b000 : SetFflagsM = 5'b0;
|
||||
3'b001 : SetFflagsM = FMAFlgM;
|
||||
3'b010 : SetFflagsM = FAddFlgM;
|
||||
3'b011 : SetFflagsM = FDivSqrtFlgM;
|
||||
3'b100 : SetFflagsM = FFlgM;
|
||||
default : SetFflagsM = 5'bxxxxx;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -334,19 +295,14 @@ module fpu (
|
||||
// M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW);
|
||||
|
||||
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
|
||||
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW);
|
||||
|
||||
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
|
||||
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW);
|
||||
|
||||
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW);
|
||||
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
|
||||
|
||||
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
|
||||
|
||||
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
|
||||
@ -363,20 +319,6 @@ module fpu (
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//***turn into muxs
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
3'b000 : FPUFlagsW = 5'b0;
|
||||
3'b001 : FPUFlagsW = FMAFlgW;
|
||||
3'b010 : FPUFlagsW = FAddFlgW;
|
||||
3'b011 : FPUFlagsW = FDivSqrtFlgW;
|
||||
3'b100 : FPUFlagsW = {4'b0,FFlgW};
|
||||
default : FPUFlagsW = 5'bxxxxx;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
|
||||
@ -393,13 +335,11 @@ module fpu (
|
||||
// floating-point results
|
||||
//
|
||||
// define offsets for LSB zero extension or truncation
|
||||
always_comb begin
|
||||
// zero extension
|
||||
//***turn into mux
|
||||
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
|
||||
//*** put into mem stage
|
||||
SetFflagsM = FPUFlagsW;
|
||||
end
|
||||
always_comb begin
|
||||
// zero extension
|
||||
//***turn into mux
|
||||
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
|
||||
end
|
||||
end else begin // no F_SUPPORTED; tie outputs low
|
||||
assign FStallD = 0;
|
||||
assign FWriteIntE = 0;
|
||||
|
@ -1,198 +0,0 @@
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and convert SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller exponent,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put sum onto output.
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
|
||||
|
||||
input logic [63:0] SrcXE; // 1st input operand (A)
|
||||
input logic [63:0] SrcYE; // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE; // Function opcode
|
||||
input logic FmtE; // Result Precision (1 for double, 0 for single)
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtE | FOpCtrlE[2];
|
||||
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
wire [11:0] exp_diff1, exp_diff2;
|
||||
wire [11:0] exp_shift;
|
||||
wire [51:0] mantissaA;
|
||||
wire [56:0] mantissaA1;
|
||||
wire [63:0] mantissaA3;
|
||||
wire [51:0] mantissaB;
|
||||
wire [56:0] mantissaB1, mantissaB2;
|
||||
wire [63:0] mantissaB3;
|
||||
wire exp_gt63;
|
||||
wire Sticky_out;
|
||||
wire sub;
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
output logic [63:0] AddFloat1E;
|
||||
output logic [63:0] AddFloat2E;
|
||||
output logic [10:0] AddExponentE;
|
||||
output logic [10:0] AddExpPostSumE;
|
||||
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
|
||||
output logic [63:0] AddSumE, AddSumTcE;
|
||||
output logic [3:0] AddSelInvE;
|
||||
output logic AddCorrSignE;
|
||||
output logic AddSignAE;
|
||||
output logic AddOp1NormE, AddOp2NormE;
|
||||
output logic AddOpANormE, AddOpBNormE;
|
||||
output logic AddInvalidE;
|
||||
output logic AddDenormInE;
|
||||
// output logic exp_valid;
|
||||
output logic AddConvertE;
|
||||
output logic AddSwapE;
|
||||
output logic AddNormOvflowE;
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the FOpCtrlE , and their precision P.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
// the third pipeline stage to select the result. Also, AddOp1NormE
|
||||
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
|
||||
AddFloat1E, AddFloat2E, FOpCtrlE);
|
||||
|
||||
// Perform Exponent Subtraction (used for alignment). For performance
|
||||
// both exponent subtractions are performed in parallel. This was
|
||||
// changed to a behavior level to allow the tools to try to optimize
|
||||
// the two parallel additions. The input values are zero-extended to 12
|
||||
// bits prior to performing the addition.
|
||||
|
||||
assign exp1 = {1'b0, AddFloat1E[62:52]};
|
||||
assign exp2 = {1'b0, AddFloat2E[62:52]};
|
||||
assign exp_diff1 = exp1 - exp2;
|
||||
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
|
||||
|
||||
// The second operand (B) should be set to zero, if FOpCtrlE does not
|
||||
// specify addition or subtraction
|
||||
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
|
||||
|
||||
// Swapped operands if zeroB is not one and exp1 < exp2.
|
||||
// Swapping causes exp2 to be used for the result exponent.
|
||||
// Only the exponent of the larger operand is used to determine
|
||||
// the final result.
|
||||
assign AddSwapE = exp_diff1[11] & ~zeroB;
|
||||
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
|
||||
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
|
||||
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
// modified to 52 bits to detect leading zeroes on denormalized mantissas
|
||||
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
|
||||
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
|
||||
|
||||
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
|
||||
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
|
||||
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
|
||||
|
||||
// Determine the alignment shift and limit it to 63. If any bit from
|
||||
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
|
||||
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
|
||||
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
|
||||
| exp_shift[8] | exp_shift[7] | exp_shift[6];
|
||||
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
|
||||
|
||||
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
|
||||
// 001.M[51]M[50] ... M[1]M[0]00
|
||||
// Unless the number has an exponent of zero, in which case it
|
||||
// is unpacked as
|
||||
// 000.00 ... 00
|
||||
// This effectively flushes denormalized values to zero.
|
||||
// The three bits of to the left of the binary point prevent overflow
|
||||
// and loss of sign information. The two bits to the right of the
|
||||
// original mantissa form the "guard" and "round" bits that are used
|
||||
// to round the result.
|
||||
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
|
||||
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
|
||||
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
|
||||
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
|
||||
|
||||
// Perform mantissa alignment using a 57-bit barrel shifter
|
||||
// If any of the bits shifted out are one, Sticky_out is set.
|
||||
// The size of the barrel shifter could be reduced by two bits
|
||||
// by not adding the leading two zeros until after the shift.
|
||||
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
assign IntValue [31:0] = SrcXE[31:0];
|
||||
assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
|
||||
// and the exponent value is left unchanged.
|
||||
// Under denormalized cases, the exponent before the rounder is set to 1
|
||||
// if the normal shift value is 11.
|
||||
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
|
||||
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
|
||||
|
||||
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
|
||||
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
|
||||
// zeros.
|
||||
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
|
||||
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
|
||||
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
|
||||
|
||||
// The sign of the result needs to be corrected if the true
|
||||
// operation is subtraction and the input operands were swapped.
|
||||
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
|
||||
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -1,163 +0,0 @@
|
||||
//
|
||||
// File name : fpadd
|
||||
// Title : Floating-Point Adder/Subtractor
|
||||
// project : FPU
|
||||
// Library : fpadd
|
||||
// Author(s) : James E. Stine, Jr., Brett Mathis
|
||||
// Purpose : definition of main unit to floating-point add/sub
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
// Copyright AFRL
|
||||
//
|
||||
// Basic and Denormalized Operations
|
||||
//
|
||||
// Step 1: Load operands, set flags, and AddConvertM SP to DP
|
||||
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
||||
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
|
||||
// or of (exp1 = exp2 AND mnt1 < mnt2)
|
||||
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
|
||||
// and extend precision by three bits to the right.
|
||||
// Step 5: Add or subtract the mantissas.
|
||||
// Step 6: Normalize the result.//
|
||||
// Shift left until normalized. Normalized when the value to the
|
||||
// left of the binrary point is 1.
|
||||
// Step 7: Round the result.//
|
||||
// Step 8: Put AddSumM onto output.
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
|
||||
|
||||
input [2:0] FrmM; // Rounding mode - specify values
|
||||
input [3:0] FOpCtrlM; // Function opcode
|
||||
input FmtM; // Result Precision (0 for double, 1 for single)
|
||||
// input AddOvEnM; // Overflow trap enabled
|
||||
// input AddUnEnM; // Underflow trap enabled
|
||||
input [63:0] AddSumM, AddSumTcM;
|
||||
input [63:0] AddFloat1M;
|
||||
input [63:0] AddFloat2M;
|
||||
input [11:0] AddExp1DenormM, AddExp2DenormM;
|
||||
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
|
||||
//input exp_valid;
|
||||
input [3:0] AddSelInvM;
|
||||
input AddOp1NormM, AddOp2NormM;
|
||||
input AddOpANormM, AddOpBNormM;
|
||||
input AddInvalidM;
|
||||
input AddDenormInM;
|
||||
input AddSignAM;
|
||||
input AddCorrSignM;
|
||||
input AddConvertM;
|
||||
input AddSwapM;
|
||||
// input AddNormOvflowM;
|
||||
|
||||
output [63:0] FAddResM; // Result of operation
|
||||
output [4:0] FAddFlgM; // IEEE exception flags
|
||||
wire AddDenormM; // AddDenormM on input or output
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtM | FOpCtrlM[2];
|
||||
|
||||
wire [10:0] exp_pre;
|
||||
wire [63:0] Result;
|
||||
wire [63:0] sum_norm, sum_norm_w_bypass;
|
||||
wire [5:0] norm_shift, norm_shift_denorm;
|
||||
wire exp_valid;
|
||||
wire DenormIO;
|
||||
wire [4:0] FlagsIn;
|
||||
wire Sticky_out;
|
||||
wire sign_corr;
|
||||
wire zeroB;
|
||||
wire [10:0] AddExpPostSumM;
|
||||
wire mantissa_comp;
|
||||
wire mantissa_comp_sum;
|
||||
wire mantissa_comp_sum_tc;
|
||||
wire Float1_sum_comp;
|
||||
wire Float2_sum_comp;
|
||||
wire Float1_sum_tc_comp;
|
||||
wire Float2_sum_tc_comp;
|
||||
wire normal_underflow;
|
||||
wire [63:0] sum_corr;
|
||||
logic AddNormOvflowM;
|
||||
|
||||
|
||||
logic AddOvEnM; // Overflow trap enabled
|
||||
logic AddUnEnM; // Underflow trap enabled
|
||||
|
||||
assign AddOvEnM = 1'b1;
|
||||
assign AddUnEnM = 1'b1;
|
||||
//AddExponentM value pre-rounding with considerations for denormalized
|
||||
//cases/conversion cases
|
||||
assign exp_pre = AddDenormInM ?
|
||||
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
|
||||
: (AddConvertM ? 11'b10000111100 : AddExponentM);
|
||||
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
// Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
|
||||
assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
|
||||
assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
|
||||
|
||||
// Determines the correct Float value to compare based on AddSwapM result
|
||||
assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
|
||||
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
|
||||
|
||||
// Determines the correct comparison result based on operation and sign of resulting AddSumM
|
||||
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
|
||||
|
||||
// If the signs are different and both operands aren't denormalized
|
||||
// the normal underflow bit is needed and therefore updated.
|
||||
assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
|
||||
|
||||
// Determine the correct sign of the result
|
||||
assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];
|
||||
|
||||
// If the AddSumM is negative, use its two complement instead.
|
||||
// This value has to be 64-bits to correctly handle the
|
||||
// case 10...00
|
||||
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
|
||||
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
// zero; otherwise, it is one.
|
||||
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
|
||||
|
||||
assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);
|
||||
|
||||
// Barell shifter used for normalization. It takes as inputs the
|
||||
// the corrected AddSumM and the amount by which the AddSumM should
|
||||
// be right shifted. It outputs the normalized AddSumM.
|
||||
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
|
||||
|
||||
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. If the result is a single precision number, the actual
|
||||
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
|
||||
// At this point, normalization has already been performed, so we know
|
||||
// exactly where the rounding point is. The rounding units also
|
||||
// handles special cases and set the exception flags.
|
||||
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
|
||||
// help in processor reservation station detection of load/stores. In
|
||||
// other words, the processor would like to know ahead of time that
|
||||
// if the result is an exception then don't load or store.
|
||||
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
|
||||
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
|
||||
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
|
||||
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
assign FAddResM = Result;
|
||||
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -94,14 +94,14 @@ string tests32f[] = '{
|
||||
"rv64f/I-FSW-01", "2000",
|
||||
"rv64f/I-FCLASS-S-01", "2000",
|
||||
"rv64f/I-FADD-S-01", "2000",
|
||||
// "rv64f/I-FCVT-S-L-01", "2000",
|
||||
// "rv64f/I-FCVT-S-LU-01", "2000",
|
||||
// "rv64f/I-FCVT-S-W-01", "2000",
|
||||
// "rv64f/I-FCVT-S-WU-01", "2000",
|
||||
// "rv64f/I-FCVT-L-S-01", "2000",
|
||||
// "rv64f/I-FCVT-LU-S-01", "2000",
|
||||
// "rv64f/I-FCVT-W-S-01", "2000",
|
||||
// "rv64f/I-FCVT-WU-S-01", "2000",
|
||||
"rv64f/I-FCVT-S-L-01", "2000",
|
||||
"rv64f/I-FCVT-S-LU-01", "2000",
|
||||
"rv64f/I-FCVT-S-W-01", "2000",
|
||||
"rv64f/I-FCVT-S-WU-01", "2000",
|
||||
"rv64f/I-FCVT-L-S-01", "2000",
|
||||
"rv64f/I-FCVT-LU-S-01", "2000",
|
||||
"rv64f/I-FCVT-W-S-01", "2000",
|
||||
"rv64f/I-FCVT-WU-S-01", "2000",
|
||||
// "rv64f/I-FDIV-S-01", "2000",
|
||||
"rv64f/I-FEQ-S-01", "2000",
|
||||
"rv64f/I-FLE-S-01", "2000",
|
||||
@ -122,6 +122,16 @@ string tests32f[] = '{
|
||||
|
||||
string tests64d[] = '{
|
||||
// "rv64d/I-FDIV-D-01", "2000",
|
||||
"rv64d/I-FCVT-D-L-01", "2000",
|
||||
"rv64d/I-FCVT-D-LU-01", "2000",
|
||||
// "rv64d/I-FCVT-D-S-01", "2000", //the number to be converted is in the lower 32 bits need to change the test
|
||||
"rv64d/I-FCVT-D-W-01", "2000",
|
||||
"rv64d/I-FCVT-D-WU-01", "2000",
|
||||
"rv64d/I-FCVT-L-D-01", "2000",
|
||||
"rv64d/I-FCVT-LU-D-01", "2000",
|
||||
// "rv64d/I-FCVT-S-D-01", "2000", //the result is in the lower 32 bits needs to be changed in the imperas test
|
||||
"rv64d/I-FCVT-W-D-01", "2000",
|
||||
// "rv64d/I-FCVT-WU-D-01", "2000", //this test needs to be fixed it expects 2^64-1 rather then 2^32-1 (specified in spec)
|
||||
"rv64d/I-FSD-01", "2000",
|
||||
"rv64d/I-FLD-01", "2420",
|
||||
"rv64d/I-FNMADD-D-01", "2000",
|
||||
@ -134,16 +144,6 @@ string tests32f[] = '{
|
||||
"rv64d/I-FEQ-D-01", "2000",
|
||||
"rv64d/I-FADD-D-01", "2000",
|
||||
"rv64d/I-FCLASS-D-01", "2000",
|
||||
// "rv64d/I-FCVT-D-L-01", "2000",
|
||||
// "rv64d/I-FCVT-D-LU-01", "2000",
|
||||
// "rv64d/I-FCVT-D-S-01", "2000",
|
||||
// "rv64d/I-FCVT-D-W-01", "2000",
|
||||
// "rv64d/I-FCVT-D-WU-01", "2000",
|
||||
// "rv64d/I-FCVT-L-D-01", "2000",
|
||||
// "rv64d/I-FCVT-LU-D-01", "2000",
|
||||
// "rv64d/I-FCVT-S-D-01", "2000",
|
||||
// "rv64d/I-FCVT-W-D-01", "2000",
|
||||
// "rv64d/I-FCVT-WU-D-01", "2000",
|
||||
"rv64d/I-FMADD-D-01", "2000",
|
||||
"rv64d/I-FMUL-D-01", "2000",
|
||||
"rv64d/I-FMV-D-X-01", "2000",
|
||||
@ -898,8 +898,22 @@ module instrNameDecTB(
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
|
||||
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
|
||||
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
|
||||
else if (funct7 == 7'b1110000 && rs2 == 5'b00000) name = "FMV.X.W";
|
||||
else if (funct7 == 7'b1111000 && rs2 == 5'b00000) name = "FMV.W.X";
|
||||
else if (funct7 == 7'b1110001 && rs2 == 5'b00000) name = "FMV.X.D"; // DOUBLE
|
||||
@ -915,22 +929,50 @@ module instrNameDecTB(
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
|
||||
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
|
||||
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
|
||||
else if (funct7[6:2] == 5'b00100) name = "FSGNJN";
|
||||
else if (funct7[6:2] == 5'b00101) name = "FMAX";
|
||||
else if (funct7[6:2] == 5'b10100) name = "FLT";
|
||||
else if (funct7[6:2] == 5'b11100) name = "FCLASS";
|
||||
else name = "ILLEGAL";
|
||||
10'b0101111_010: if (funct7[6:2] == 5'b00000) name = "FADD";
|
||||
10'b1010011_010: if (funct7[6:2] == 5'b00000) name = "FADD";
|
||||
else if (funct7[6:2] == 5'b00001) name = "FSUB";
|
||||
else if (funct7[6:2] == 5'b00010) name = "FMUL";
|
||||
else if (funct7[6:2] == 5'b00011) name = "FDIV";
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
|
||||
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
|
||||
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
|
||||
else if (funct7[6:2] == 5'b00100) name = "FSGNJX";
|
||||
else if (funct7[6:2] == 5'b10100) name = "FEQ";
|
||||
else name = "ILLEGAL";
|
||||
@ -941,8 +983,22 @@ module instrNameDecTB(
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
|
||||
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
|
||||
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
|
||||
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
|
||||
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
|
||||
else name = "ILLEGAL";
|
||||
10'b0000111_010: name = "FLW";
|
||||
10'b0100111_010: name = "FSW";
|
||||
|
Loading…
Reference in New Issue
Block a user