Almost all convert instructions pass Imperas tests

This commit is contained in:
Katherine Parry 2021-07-11 18:06:33 -04:00
parent 20f2a4e47c
commit 36f59f3c99
32 changed files with 948 additions and 5726 deletions

View File

@ -46,7 +46,7 @@
`define MEM_DCACHE 0
`define MEM_DTIM 1
`define MEM_ICACHE 0
`define MEM_VIRTMEM 0\1
`define MEM_VIRTMEM 1
`define VECTORED_INTERRUPTS_SUPPORTED 1
`define ITLB_ENTRIES 32
@ -56,10 +56,7 @@
`define PMP_ENTRIES 16
// Address space
`define RESET_VECTOR 64'h0000000080000000
// Bus Interface width
`define AHBW 64
`define RESET_VECTOR 64'h80000000
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
@ -84,6 +81,9 @@
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width
`define AHBW 64
// Test modes
// Tie GPIO outputs back to inputs

View File

@ -8,7 +8,7 @@ add wave /testbench/clk
add wave /testbench/reset
add wave -divider
#add wave /testbench/dut/hart/ebu/IReadF
add wave /testbench/dut/hart/DataStall
#add wave /testbench/dut/hart/DataStall
add wave /testbench/dut/hart/ICacheStallF
add wave /testbench/dut/hart/StallF
add wave /testbench/dut/hart/StallD

View File

@ -1,65 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
//
// Block Name: add.v
// Author: David Harris
// Date: 11/12/1995
//
// Block Description:
// This block performs the addition of the product and addend. It also
// contains logic necessary to adjust the signs for effective subtracts
// and negative results.
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
module add(rM, sM, tM, sum,
negsum, invz, selsum1, negsum0, negsum1, killprodM);
////////////////////////////////////////////////////////////////////////////////
input logic [105:0] rM; // partial product 1
input logic [105:0] sM; // partial product 2
input logic [163:0] tM; // aligned addend
input logic invz; // invert addend
input logic selsum1; // select +1 mode of compound adder
input logic killprodM; // z >> product
input logic negsum; // Negate sum
output logic [163:0] sum; // sum
output logic negsum0; // sum was negative in +0 mode
output logic negsum1; // sum was negative in +1 mode
// Internal nodes
wire [105:0] r2; // partial product possibly zeroed out
wire [105:0] s2; // partial product possibly zeroed out
wire [164:0] t2; // addend after inversion if necessary
wire [164:0] sum0; // sum of compound adder +0 mode
wire [164:0] sum1; // sum of compound adder +1 mode
wire [163:0] prodshifted; // sum of compound adder +1 mode
wire [164:0] tmp; // sum of compound adder +1 mode
// Invert addend if z'sM sign is diffrent from the product'sM sign
assign t2 = invz ? ~{1'b0,tM} : {1'b0,tM};
// Zero out product if Z >> product or product really should be
assign r2 = killprodM ? 106'b0 : rM;
assign s2 = killprodM ? 106'b0 : sM;
//***replace this with a more structural cpa that synthisises better
// Compound adder
// Consists of 3:2 CSA followed by long compound CPA
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
// Check sign bits in +0/1 modes
assign negsum0 = sum0[164];
assign negsum1 = sum1[164];
// Mux proper result (+Oil mode and inversion) using 4:1 mux
//assign sumzero = |sum;
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
endmodule

View File

@ -1,88 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: align.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the alignment shifter. It is responsible for
// adjusting the fraction portion of the addend relative to the fraction
// produced in the multiplier array.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
killprodE, sumshiftE, sumshiftzeroE);
/////////////////////////////////////////////////////////////////////////////
input logic [51:0] zman; // Fraction of addend z;
input logic [12:0] aligncntE; // amount to shift
input logic xzeroE; // Input X = 0
input logic yzeroE; // Input Y = 0
input logic zzeroE; // Input Z = 0
input logic zdenormE; // Input Z is denormalized
output logic [163:0] tE; // aligned addend (54 bits left of bpt)
output logic bsE; // sticky bit of addend
output logic killprodE; // Z >> product
output logic [8:0] sumshiftE;
output logic sumshiftzeroE;
// Internal nodes
reg [215:0] shift; // aligned addend from shifter
logic [12:0] tmp;
always_comb
begin
// Default to clearing sticky bits
bsE = 0;
// And to using product as primary operand in adder I exponent gen
killprodE = xzeroE | yzeroE;
// d = aligncntE
// p = 53
//***try reducing this hardware to use one shifter
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
//product ancored case with saturated shift
sumshiftE = 163; // 3p+4
sumshiftzeroE = 0;
shift = {1'b1,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
// product ancored or cancellation
tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0]; // p + 2 - d
sumshiftzeroE = 0;
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
// addend ancored case
// used to be 56 \/ somthing doesn't seem right too many typos
tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0];
sumshiftzeroE = 0;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
end else begin // d >= p+3
// addend anchored case with saturated shift
sumshiftE = 0;
sumshiftzeroE = 1;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]);
killprodE = 1;
end
end
endmodule

View File

@ -1,53 +0,0 @@
module booth(xExt, choose, add1, e, pp);
/////////////////////////////////////////////////////////////////////////////
input logic [53:0] xExt; // multiplicand xExt
input logic [2:0] choose; // bits needed to choose which encoding
output logic [1:0] add1; // do you add 1
output logic e;
output logic [54:0] pp; // the resultant encoding
logic [54:0] temp;
logic [53:0] negx;
//logic temp;
assign negx = ~xExt;
always_comb
case (choose)
3'b000 : pp = 55'b0; // 0
3'b001 : pp = {1'b0, xExt}; // 1
3'b010 : pp = {1'b0, xExt}; // 1
3'b011 : pp = {xExt, 1'b0}; // 2
3'b100 : pp = {negx, 1'b0}; // -2
3'b101 : pp = {1'b1, negx}; // -1
3'b110 : pp = {1'b1, negx}; // -1
3'b111 : pp = '1; // -0
endcase
always_comb
case (choose)
3'b000 : e = 0; // 0
3'b001 : e = 0; // 1
3'b010 : e = 0; // 1
3'b011 : e = 0; // 2
3'b100 : e = 1; // -2
3'b101 : e = 1; // -1
3'b110 : e = 1; // -1
3'b111 : e = 1; // -0
endcase
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
// assign add1 = choose[2];
always_comb
case (choose)
3'b000 : add1 = 2'b0; // 0
3'b001 : add1 = 2'b0; // 1
3'b010 : add1 = 2'b0; // 1
3'b011 : add1 = 2'b0; // 2
3'b100 : add1 = 2'b10; // -2
3'b101 : add1 = 2'b1; // -1
3'b110 : add1 = 2'b1; // -1
3'b111 : add1 = 2'b1; // -0
endcase
endmodule

View File

@ -1,90 +0,0 @@
module add3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
parameter BITS = 4;
input logic [BITS-1:0] a;
input logic [BITS-1:0] b;
input logic [BITS-1:0] c;
output logic [BITS-1:0] carry;
output logic [BITS-1:0] sum;
genvar i;
generate
for(i= 0; i<BITS; i=i+1) begin
sng3comp2 add0(a[i], b[i], c[i], carry[i], sum[i]);
end
endgenerate
endmodule
module add4comp2(a, b, c, d, carry, sum);
/////////////////////////////////////////////////////////////////////////////
parameter BITS = 4;
input logic [BITS-1:0] a;
input logic [BITS-1:0] b;
input logic [BITS-1:0] c;
input logic [BITS-1:0] d;
output logic [BITS:0] carry;
output logic [BITS-1:0] sum;
logic [BITS-1:0] cout;
logic carryTmp;
genvar i;
sng4comp2 add0(a[0], b[0], c[0], d[0], 1'b0, cout[0], carry[0], sum[0]);
generate
for(i= 1; i<BITS-1; i=i+1) begin
sng4comp2 add1(a[i], b[i], c[i], d[i], cout[i-1], cout[i], carry[i], sum[i]);
end
endgenerate
sng4comp2 add2(a[BITS-1], b[BITS-1], c[BITS-1], d[BITS-1], cout[BITS-2], cout[BITS-1], carryTmp, sum[BITS-1]);
assign carry[BITS-1] = carryTmp & cout[BITS-1];
assign carry[BITS] = carryTmp ^ cout[BITS-1];
endmodule
module sng3comp2(a, b, c, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors?
input logic a;
input logic b;
input logic c;
output logic carry;
output logic sum;
logic axorb;
assign axorb = a ^ b;
assign sum = axorb ^ c;
assign carry = axorb ? c : a;
endmodule
module sng4comp2(a, b, c, d, cin, cout, carry, sum);
/////////////////////////////////////////////////////////////////////////////
//look into pass gate 4:2 counters?
input logic a;
input logic b;
input logic c;
input logic d;
input logic cin;
output logic cout;
output logic carry;
output logic sum;
logic TmpSum;
sng3comp2 add1(.carry(cout), .sum(TmpSum),.*);
sng3comp2 add2(.a(TmpSum), .b(d), .c(cin), .*);
endmodule

View File

@ -1,140 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen(xexp, yexp, zexp,
killprod, sumzero, resultdenorm, normcnt, infinity,
FmaFlagsM, inf, xzero, yzero,expplus1,
nan, de0, xnan, ynan, znan, xdenorm, ydenorm, zdenorm, proddenorm, specialsel, zexpsel,
aligncnt, wexp,
prodof, sumof, sumuf, denorm0, ae);
/////////////////////////////////////////////////////////////////////////////
input [62:52] xexp; // Exponent of multiplicand x
input [62:52] yexp; // Exponent of multiplicand y
input [62:52] zexp; // Exponent of addend z
input killprod; // Z >> product
input sumzero; // sum exactly equals zero
input resultdenorm; // postnormalize rounded result
input [8:0] normcnt; // normalization shift count
input infinity; // generate infinity on overflow
input [4:0] FmaFlagsM; // Result invalid
input inf; // Some input is infinity
input nan; // Some input is NaN
input [12:0] de0; // X is NaN NaN
input xnan; // X is NaN
input ynan; // Y is NaN
input znan; // Z is NaN
input xdenorm; // Z is denorm
input ydenorm; // Z is denorm
input zdenorm; // Z is denorm
input xzero; // Z is denorm
input yzero; // Z is denorm
input expplus1;
input proddenorm; // product is denorm
input specialsel; // Select special result
input zexpsel; // Select special result
output [12:0] aligncnt; // shift count for alignment shifter
output [62:52] wexp; // Exponent of result
output prodof; // X*Y exponent out of bounds
output sumof; // X*Y+Z exponent out of bounds
output sumuf; // X*Y+Z exponent underflows
output denorm0; // exponent = 0 for denorm
output [12:0] ae; //exponent of multiply
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
assign ae = xzero|yzero ? 0 : xexp + yexp -1023;
assign prodof = (ae > 2046 && ~ae[12]);
// Compute alignment shift count
// Adjust for postrounding normalization of Z.
// This should not increas the critical path because the time to
// check if a round overflows is shorter than the actual round and
// is masked by the bypass mux and two 10 bit adder delays.
assign aligncnt0 = - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
assign aligncnt1 = - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
assign aligncnt = zexp -ae - 1 + {12'b0,~xdenorm} + {12'b0,~ydenorm} - {12'b0,~zdenorm};
//assign aligncnt = zexp -ae - 1 + ~xdenorm + ~ydenorm - ~zdenorm;
//assign aligncnt = zexp - ae;// KEP use all of ae
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : ae;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// If the exponent becomes exactly zero (denormalized)
// signal such to adjust R bit before rounding
assign denorm0 = (de0 == 0);
// check for exponent out of bounds after add
assign de = resultdenorm | sumzero ? 0 : de0;
assign sumof = ~de[12] && de > 2046;
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
assign specialres = FmaFlagsM[4] | nan ? nanres : // invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 11'b11111111111 :
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
// IEEE 754-2008 section 6.2.3 states:
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input NaNs if representable in the destination
// format. This standard does not specify which of the input NaNs will provide the payload."
assign nanres = xnan ? xexp : (ynan ? yexp : (znan? zexp : 11'b11111111111));
// A mux selects the early result from other FPU blocks or the
// normalized FMAC result. Special cases are also detected.
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1;
endmodule

View File

@ -1,90 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
xdenormE, ydenormE, zdenormE,
aligncntE, prodof, aeE);
/////////////////////////////////////////////////////////////////////////////
input logic [62:52] xexp; // Exponent of multiplicand x
input logic [62:52] yexp; // Exponent of multiplicand y
input logic [62:52] zexp; // Exponent of addend z
input logic xdenormE; // Z is denorm
input logic ydenormE; // Z is denorm
input logic zdenormE; // Z is denorm
input logic xzeroE; // Z is denorm
input logic yzeroE; // Z is denorm
output logic [12:0] aligncntE; // shift count for alignment shifter
output logic prodof; // X*Y exponent out of bounds
output logic [12:0] aeE; //exponent of multiply
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
assign prodof = (aeE > 2046 && ~aeE[12]);
// Compute alignment shift count
// Adjust for postrounding normalization of Z.
// This should not increas the critical path because the time to
// check if a round overflows is shorter than the actual round and
// is masked by the bypass mux and two 10 bit adder delays.
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
//assign aligncntE = zexp - aeE;// KEP use all of aeE
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : aeE;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
endmodule

View File

@ -1,108 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: expgen.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements the exponent path of the FMAC. It performs the
// following operations:
//
// 1) Compute exponent of multiply.
// 2) Compare multiply and add exponents to generate alignment shift count
// 3) Adjust exponent based on normalization
// 4) Increment exponent based on postrounding renormalization
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module expgen2(xexp, yexp, zexp,
sumzero, resultdenorm, infinity,
FmaFlagsM, inf, expplus1,
nanM, de0, xnanM, ynanM, znanM, specialsel,
wexp,
sumof, sumuf);
/////////////////////////////////////////////////////////////////////////////
input logic [62:52] xexp; // Exponent of multiplicand x
input logic [62:52] yexp; // Exponent of multiplicand y
input logic [62:52] zexp; // Exponent of addend z
input logic sumzero; // sum exactly equals zero
input logic resultdenorm; // postnormalize rounded result
input logic infinity; // generate infinity on overflow
input logic [4:0] FmaFlagsM; // Result invalid
input logic inf; // Some input is infinity
input logic nanM; // Some input is NaN
input logic [12:0] de0; // X is NaN NaN
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic expplus1;
input logic specialsel; // Select special result
output logic [62:52] wexp; // Exponent of result
output logic sumof; // X*Y+Z exponent out of bounds
output logic sumuf; // X*Y+Z exponent underflows
// Internal nodes
wire [12:0] aligncnt0; // Shift count for alignment
wire [12:0] aligncnt1; // Shift count for alignment
wire [12:0] be; // Exponent of multiply
wire [12:0] de1; // Normalized exponent
wire [12:0] de; // Normalized exponent
wire [10:0] infinityres; // Infinity or max number
wire [10:0] nanres; // Nan propagated or generated
wire [10:0] specialres; // Exceptional case result
// Compute exponent of multiply
// Note that the exponent does not have to be incremented on a postrounding
// normalization of X because the mantissa was already increased. Report
// if exponent is out of bounds
// Select exponent (usually from product except in case of huge addend)
//assign be = zexpsel ? zexp : ae;
// Adjust exponent based on normalization
// A compound adder takes care of the case of post-rounding normalization
// requiring an extra increment
//assign de0 = sumzero ? 13'b0 : be + normcnt + 2;
// assign de1 = sumzero ? 13'b0 : be + normcnt + 2;
// check for exponent out of bounds after add
assign de = resultdenorm | sumzero ? 0 : de0;
assign sumof = ~de[12] && de > 2046;
assign sumuf = de == 0 && ~sumzero && ~resultdenorm;
// bypass occurs before rounding or taking early results
//assign wbypass = de0[10:0];
// In a non-critical special mux, we combine the early result from other
// FPU blocks with the results of exceptional conditions. Overflow
// produces either infinity or the largest finite number, depending on the
// rounding mode. NaNs are propagated or generated.
assign specialres = FmaFlagsM[4] | nanM ? nanres : // invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 11'b11111111111 :
FmaFlagsM[1] ? 11'b0 : 11'bx; //underflow
assign infinityres = infinity ? 11'b11111111111 : 11'b11111111110;
// IEEE 754-2008 section 6.2.3 states:
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input NaNs if representable in the destination
// format. This standard does not specify which of the input NaNs will provide the payload."
assign nanres = xnanM ? xexp : (ynanM ? yexp : (znanM? zexp : 11'b11111111111));
// A mux selects the early result from other FPU blocks or the
// normalized FMAC result. Special cases are also detected.
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
endmodule

View File

@ -1,88 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag(xnan, ynan, znan, xinf, yinf, zinf, prodof, sumof, sumuf,
psign, zsign, xzero, yzero, zzero, vbits, killprod,
inf, nan, FmaFlagsM,sticky);
/////////////////////////////////////////////////////////////////////////////
input xnan; // X is NaN
input ynan; // Y is NaN
input znan; // Z is NaN
input sticky; // X is Inf
input xinf; // X is Inf
input yinf; // Y is Inf
input zinf; // Z is Inf
input prodof; // X*Y overflows exponent
input sumof; // X*Y + z underflows exponent
input sumuf; // X*Y + z underflows exponent
input psign; // Sign of product
input zsign; // Sign of z
input xzero; // x = 0
input yzero; // y = 0
input zzero; // y = 0
input killprod;
input [1:0] vbits; // R and S bits of result
output inf; // Some source is Inf
output nan; // Some source is NaN
output [4:0] FmaFlagsM;
// Internal nodes
wire prodinf; // X*Y larger than max possible
wire suminf; // X*Y+Z larger than max possible
// If any input is NaN, propagate the NaN
assign nan = xnan || ynan || znan;
// Same with infinity (inf - inf and O * inf don't propagate inf
// but it's ok becaue illegal op takes higher precidence)
assign inf= xinf || yinf || zinf || suminf;//KEP added suminf
//assign inf= xinf || yinf || zinf;//original
// Generate infinity checks
assign prodinf = prodof && ~xnan && ~ynan;
//KEP added if the product is infinity then sum is infinity
assign suminf = sumof && ~xnan && ~ynan && ~znan;
// Set invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
assign FmaFlagsM[4] = (xinf || yinf || prodinf) && zinf && (psign ^ zsign) ||
xzero && yinf || yzero && xinf;// KEP remove case 3) above
assign FmaFlagsM[3] = 0; // divide by zero flag
// Set the overflow flag for the following cases:
// 1) Rounded multiply result would be out of bounds
// 2) Rounded add result would be out of bounds
assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases:
// 1) Any input is denormalized
// 2) Output would be denormalized or smaller
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinf && ~nan) || (killprod & zzero & ~(yzero | xzero));
// Set the inexact flag for the following cases:
// 1) Multiplication inexact
// 2) Addition inexact
// One of these cases occurred if the R or S bit is set
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nan);
endmodule

View File

@ -1,34 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
/////////////////////////////////////////////////////////////////////////////
input logic xnanE; // X is NaN
input logic ynanE; // Y is NaN
input logic znanE; // Z is NaN
input logic prodof; // X*Y overflows exponent
output logic nanE; // Some source is NaN
// Internal nodes
output logic prodinfE; // X*Y larger than max possible
// If any input logic is NaN, propagate the NaN
assign nanE = xnanE || ynanE || znanE;
// Generate infinity checks
assign prodinfE = prodof && ~xnanE && ~ynanE;
endmodule

View File

@ -1,80 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: flag.v
// Author: David Harris
// Date: 12/6/1995
//
// Block Description:
// This block generates the flags: invalid, overflow, underflow, inexact.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof, sumuf,
xzeroM, yzeroM, zzeroM, vbits, killprodM,
inf, nanM, FmaFlagsM,sticky,prodinfM);
/////////////////////////////////////////////////////////////////////////////
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic xsign; // Sign of z
input logic ysign; // Sign of z
input logic zsign; // Sign of z
input logic sticky; // X is Inf
input logic prodinfM;
input logic xinfM; // X is Inf
input logic yinfM; // Y is Inf
input logic zinfM; // Z is Inf
input logic sumof; // X*Y + z underflows exponent
input logic sumuf; // X*Y + z underflows exponent
input logic xzeroM; // x = 0
input logic yzeroM; // y = 0
input logic zzeroM; // y = 0
input logic killprodM;
input logic [1:0] vbits; // R and S bits of result
output logic inf; // Some source is Inf
input logic nanM; // Some source is NaN
output logic [4:0] FmaFlagsM;
// Internal nodes
logic suminf;
// Same with infinity (inf - inf and O * inf don't propagate inf
// but it's ok becaue illegal op takes higher precidence)
assign inf= xinfM || yinfM || zinfM || suminf;//KEP added suminf
//assign inf= xinfM || yinfM || zinfM;//original
assign suminf = sumof && ~xnanM && ~ynanM && ~znanM;
// Set the overflow flag for the following cases:
// 1) Rounded multiply result would be out of bounds
// 2) Rounded add result would be out of bounds
assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases:
// 1) Any input logic is denormalized
// 2) output logic would be denormalized or smaller
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
// Set the inexact flag for the following cases:
// 1) Multiplication inexact
// 2) Addition inexact
// One of these cases occurred if the R or S bit is set
assign FmaFlagsM[0] = (vbits[0] || vbits[1] ||sticky || suminf) && ~(inf || nanM);
// Set invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
assign FmaFlagsM[3] = 0; // divide by zero flag
endmodule

View File

@ -1,132 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// Block Name: fmac.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This is the top level block of a floating-point multiply/accumulate
// unit(FMAC). It instantiates the following sub-blocks:
//
// array Booth encoding, partial product generation, product summation
// expgen Exponent summation, compare, and adjust
// align Alignment shifter
// add Carry-save adder for accumulate, carry propagate adder
// lza Leading zero anticipator to control normalization shifter
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to ReadData1E or ReadData3E inputs
// sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IEEE flags.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma(ReadData1E, ReadData2E, ReadData3E, FrmE,
FmaResultM, FmaFlagsM, aligncnt);
/////////////////////////////////////////////////////////////////////////////
input [63:0] ReadData1E; // input 1
input [63:0] ReadData2E; // input 2
input [63:0] ReadData3E; // input 3
input [2:0] FrmE; // Rounding mode
output [63:0] FmaResultM; // output FmaResultM=ReadData1E*ReadData2E+ReadData3E
output [4:0] FmaFlagsM; // status flags
output [12:0] aligncnt; // status flags
// Internal nodes
logic [105:0] r; // one result of partial product sum
logic [105:0] s; // other result of partial products
logic [163:0] t; // output of alignment shifter
logic [163:0] sum; // output of carry prop adder
logic [53:0] v; // normalized sum, R, S bits
// logic [12:0] aligncnt; // shift count for alignment
logic [8:0] normcnt; // shift count for normalizer
logic [12:0] ae; // multiplier expoent
logic bs; // sticky bit of addend
logic ps; // sticky bit of product
logic killprod; // ReadData3E >> product
logic negsum; // negate sum
logic invz; // invert addend
logic selsum1; // select +1 mode of sum
logic negsum0; // sum +0 < 0
logic negsum1; // sum +1 < 0
logic sumzero; // sum = 0
logic infinity; // generate infinity on overflow
logic prodof; // ReadData1E*ReadData2E out of range
logic sumof; // result out of range
logic xzero;
logic yzero;
logic zzero;
logic xdenorm;
logic ydenorm;
logic zdenorm;
logic proddenorm;
logic zexpsel;
logic denorm0;
logic resultdenorm;
logic inf;
logic xinf;
logic yinf;
logic zinf;
logic xnan;
logic ynan;
logic znan;
logic specialsel;
logic expplus1;
logic nan;
logic sumuf;
logic psign;
logic sticky;
logic [8:0] sumshift;
logic sumshiftzero;
logic [12:0] de0;
logic isAdd;
assign isAdd = 1;
// Instantiate fraction datapath
multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*);
align align(.zman(ReadData3E[51:0]),.*);
add add(.*);
lza lza(.*);
normalize normalize(.zexp(ReadData3E[62:52]),.*);
round round(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]),.zman(ReadData3E[51:0]), .wman(FmaResultM[51:0]),.wsign(FmaResultM[63]),.*);
// Instantiate exponent datapath
expgen expgen(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.wexp(FmaResultM[62:52]),.*);
// Instantiate special case detection across datapath & exponent path
special special(.*);
// Instantiate control logic
sign sign(.xsign(ReadData1E[63]),.ysign(ReadData2E[63]),.zsign(ReadData3E[63]),.wsign(FmaResultM[63]),.*);
flag flag(.zsign(ReadData3E[63]),.vbits(v[1:0]),.*);
endmodule

View File

@ -1,165 +0,0 @@
module fma1(
input logic [63:0] FInput1E, // X
input logic [63:0] FInput2E, // Y
input logic [63:0] FInput3E, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [211:0] Shift; // output of the alignment shifter including sticky bit
logic XDenormE, YDenormE, ZDenormE; // inputs are denormal
logic [63:0] FInput3E2; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
// Set addend to zero if FMUL instruction
assign FInput3E2 = FOpCtrlE[2] ? 64'b0 : FInput3E;
// split inputs into the sign bit, fraction, and exponent and handle single or double precision
// - single precision is in the top half of the inputs
assign XSgn = FInput1E[63];
assign YSgn = FInput2E[63];
assign ZSgn = FInput3E2[63];
assign XExp = FmtE ? {2'b0, FInput1E[62:52]} : {5'b0, FInput1E[62:55]};
assign YExp = FmtE ? {2'b0, FInput2E[62:52]} : {5'b0, FInput2E[62:55]};
assign ZExp = FmtE ? {2'b0, FInput3E2[62:52]} : {5'b0, FInput3E2[62:55]};
assign XFrac = FmtE ? FInput1E[51:0] : {FInput1E[54:32], 29'b0};
assign YFrac = FmtE ? FInput2E[51:0] : {FInput2E[54:32], 29'b0};
assign ZFrac = FmtE ? FInput3E2[51:0] : {FInput3E2[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f;
// determine if an input is a special value
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
// Calculate the product's exponent
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenormE + YDenormE;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenormE;
// Alignment shifter
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
always_comb
begin
// Set default values
AddendStickyE = 0;
KillProdE = 0;
// If the product is too small to effect the sum, kill the product
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
AlignedAddendE = {107'b0, ZMan,2'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
Shift = {55'b0, ZMan, 104'b0} << -AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd105)) begin
Shift = {55'b0, ZMan, 104'b0} >> AlignCnt;
AlignedAddendE = Shift[211:50];
AddendStickyE = |(Shift[49:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
AlignedAddendE = 162'b0;
AddendStickyE = ~ZZeroE;
end
end
endmodule

View File

@ -1,282 +0,0 @@
module fma2(
input logic [63:0] FInput1M,
input logic [63:0] FInput2M,
input logic [63:0] FInput3M,
input logic [2:0] FrmM,
input logic [105:0] ProdManM,
input logic [161:0] AlignedAddendM,
input logic [12:0] ProdExpM,
input logic FmtM,
input logic AddendStickyM,
input logic KillProdM,
input logic [2:0] FOpCtrlM,
input logic XZeroM, YZeroM, ZZeroM,
input logic XInfM, YInfM, ZInfM,
input logic XNaNM, YNaNM, ZNaNM,
output logic [63:0] FmaResultM,
output logic [4:0] FmaFlagsM);
logic [51:0] XMan, YMan, ZMan, WMan;
logic [10:0] XExp, YExp, ZExp, WExp;
logic XSgn, YSgn, ZSgn, WSgn, PSgn;
logic [105:0] ProdMan2;
logic [162:0] AlignedAddend2;
logic [161:0] Sum;
logic [162:0] SumTmp;
logic [12:0] SumExp;
logic [12:0] SumExpMinus1;
logic [12:0] SumExpTmp, SumExpTmpMinus1, WExpTmp;
logic [53:0] NormSum;
logic [161:0] NormSumTmp;
logic [8:0] NormCnt;
logic NormSumSticky;
logic SumZero;
logic NegSum;
logic InvZ;
logic ResultDenorm;
logic Sticky;
logic Plus1, Minus1, Plus1Tmp, Minus1Tmp;
logic Invalid,Underflow,Overflow,Inexact;
logic [8:0] DenormShift;
logic ProdInf, ProdOf, ProdUf;
logic [63:0] FmaResultTmp;
logic SubBySmallNum;
logic [63:0] FInput3M2;
logic ZeroSgn, ResultSgn;
// Set addend to zero if FMUL instruction
assign FInput3M2 = FOpCtrlM[2] ? 64'b0 : FInput3M;
// split inputs into the sign bit, mantissa, and exponent for readability
assign XSgn = FInput1M[63];
assign YSgn = FInput2M[63];
assign ZSgn = FInput3M2[63]^FOpCtrlM[0]; //Negate Z if subtraction
assign XExp = FmtM ? FInput1M[62:52] : {3'b0, FInput1M[62:55]};
assign YExp = FmtM ? FInput2M[62:52] : {3'b0, FInput2M[62:55]};
assign ZExp = FmtM ? FInput3M2[62:52] : {3'b0, FInput3M2[62:55]};
assign XMan = FmtM ? FInput1M[51:0] : {FInput1M[54:32], 29'b0};
assign YMan = FmtM ? FInput2M[51:0] : {FInput2M[54:32], 29'b0};
assign ZMan = FmtM ? FInput3M2[51:0] : {FInput3M2[54:32], 29'b0};
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
// Addition
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = ZSgn ^ PSgn;
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0,AlignedAddendM} : {1'b0,AlignedAddendM};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
// Do the addition
// - add one to negate if the added was inverted
// - the 2 extra bits at the begining and end are needed for rounding
assign SumTmp = AlignedAddend2 + {55'b0, ProdMan2,2'b0} + {162'b0, InvZ};
// Is the sum negitive
assign NegSum = SumTmp[162];
// If the sum is negitive, negate the sum.
assign Sum = NegSum ? -SumTmp[161:0] : SumTmp[161:0];
// Leading one detector
logic [8:0] i;
always_comb begin
i = 0;
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
end
// Normalization
// Determine if the sum is zero
assign SumZero = ~(|Sum);
logic [12:0] ManLen;
assign ManLen = FmtM ? 13'd52 : 13'd23;
// Determine if the result is denormal
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-ManLen));
// Determine the shift needed for denormal results
assign SumExpTmpMinus1 = SumExpTmp-1;
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
// Normalize the sum
assign NormSumTmp = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
assign NormSum = NormSumTmp[161:108];
// Calculate the sticky bit
assign NormSumSticky = FmtM ? (|NormSumTmp[107:0]) : (|NormSumTmp[136:0]);
assign Sticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign SumExp = SumZero ? 13'b0 :
ResultDenorm ? 13'b0 :
SumExpTmp;
// Rounding
// round to nearest even
// {Gaurd, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if NormSum[2] = 1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// round to zero - do nothing
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to -infinity - Plus1 if negitive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the positive result
// round to infinity - Plus1 if positive
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// - subtract 1 if a small number was supposed to be subtracted from the negitive result
// round to nearest max magnitude
// {Gaurd, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if there was supposed to be a subtraction by a small number that didn't happen
// 101/110/111 - Plus1
// Deterimine if the result was supposed to be subtrated by a small number
logic Gaurd, Round;
assign Gaurd = FmtM ? NormSum[1] : NormSum[30];
assign Round = FmtM ? NormSum[0] : NormSum[29];
assign SubBySmallNum = AddendStickyM&InvZ&~NormSumSticky;
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: Plus1Tmp = Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&NormSum[2]));//round to nearest even
3'b001: Plus1Tmp = 0;//round to zero
3'b010: Plus1Tmp = WSgn & ~(SubBySmallNum);//round down
3'b011: Plus1Tmp = ~WSgn & ~(SubBySmallNum);//round up
3'b100: Plus1Tmp = (Gaurd & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky)));//round to nearest max magnitude
default: Plus1Tmp = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: Minus1Tmp = 0;//round to nearest even
3'b001: Minus1Tmp = SubBySmallNum;//round to zero
3'b010: Minus1Tmp = ~WSgn & SubBySmallNum;//round down
3'b011: Minus1Tmp = WSgn & SubBySmallNum;//round up
3'b100: Minus1Tmp = 0;//round to nearest max magnitude
default: Minus1Tmp = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = Sticky | (Gaurd|Round) ? Plus1Tmp : 1'b0;
assign Minus1 = Sticky | (Gaurd|Round) ? Minus1Tmp : 1'b0;
// Compute rounded result
assign {WExpTmp, WMan} = FmtM ? {SumExp, NormSum[53:2]} - {64'b0, Minus1} + {64'b0, Plus1} : {{SumExp, NormSum[53:31]} - {35'b0, Minus1} + {35'b0, Plus1}, 28'b0};
assign WExp = WExpTmp[10:0];
// Sign calculation
// Determine the sign if the sum is zero
// if product underflows then use psign
// otherwise
// if cancelation then 0 unless round to -inf
// otherwise psign
assign ZeroSgn = Underflow & ~ResultDenorm ? PSgn :
(PSgn^ZSgn ? FrmM == 3'b010 : PSgn);
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgn = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign WSgn = SumZero ? ZeroSgn : ResultSgn;
// Select the result
assign FmaResultM = XNaNM ? (FmtM ? {XSgn, FInput1M[62:52], 1'b1,FInput1M[50:0]} : {XSgn, FInput1M[62:55], 1'b1,FInput1M[53:0]}) :
YNaNM ? (FmtM ? {YSgn, FInput2M[62:52], 1'b1,FInput2M[50:0]} : {YSgn, FInput2M[62:55], 1'b1,FInput2M[53:0]}) :
ZNaNM ? (FmtM ? {ZSgn, FInput3M2[62:52], 1'b1,FInput3M2[50:0]} : {ZSgn, FInput3M2[62:55], 1'b1,FInput3M2[53:0]}) :
Invalid ? (FmtM ? {WSgn, 11'h7ff, 1'b1, 51'b0} : {WSgn, 8'h7f8, 1'b1, 54'b0}) : // has to be before inf
XInfM ? {PSgn, FInput1M[62:0]} :
YInfM ? {PSgn, FInput2M[62:0]} :
ZInfM ? {ZSgn, FInput3M2[62:0]} :
Overflow ? (FmtM ? {WSgn, 11'h7ff, 52'b0} : {WSgn, 8'h7f8, 55'b0}) :
Underflow & ~ResultDenorm ? (FmtM ? {WSgn, 63'b0} - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {{WSgn, 31'b0} - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : //***do you need minus1?
KillProdM ? (FmtM ? FInput3M2 - {63'b0, (Minus1&AddendStickyM)} + {63'b0, (Plus1&AddendStickyM)} : {FInput3M2[63:32] - {31'b0, (Minus1&AddendStickyM)} + {31'b0, (Plus1&AddendStickyM)}, 32'b0}) : // has to be after Underflow
FmtM ? {WSgn,WExp,WMan} : {WSgn,WExp[6:0],WMan,4'b0};
logic [63:0] tmp;
assign tmp = {WSgn,WExp[6:0],WMan,4'b0};
// Set Invalid flag for following cases:
// 1) Inf - Inf
// 2) 0 * Inf
// 3) any input is a signaling NaN
logic [12:0] MaxExp;
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
assign ProdOf = (ProdExpM >= MaxExp && ~ProdExpM[12]);
assign ProdInf = ProdOf && ~XNaNM && ~YNaNM;
assign SigNaN = FmtM ? (XNaNM&~FInput1M[51]) | (YNaNM&~FInput2M[51]) | (ZNaNM&~FInput3M2[51]) : (XNaNM&~FInput1M[54]) | (YNaNM&~FInput2M[54]) | (ZNaNM&~FInput3M2[54]);
assign Invalid = SigNaN | ((XInfM || YInfM || ProdInf) & ZInfM & (XSgn ^ YSgn ^ ZSgn)) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
assign Overflow = WExpTmp >= MaxExp & ~WExpTmp[12];
// Set Underflow flag if the number is too small to be represented in normal numbers
assign ProdUf = KillProdM & ZZeroM;
assign Underflow = SumExp[12] | ProdUf;
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
assign Inexact = (Sticky|Overflow| (Gaurd|Round))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result is exact
assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow & Inexact, Inexact};
endmodule

View File

@ -1,40 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: lop.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block implements a Leading One Predictor used to determine
// the normalization shift count.
///////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module lza(sum, normcnt, sumzero);
/////////////////////////////////////////////////////////////////////////////
input logic [163:0] sum; // sum
output logic [8:0] normcnt; // normalization shift count
output logic sumzero; // sum = 0
// Internal nodes
reg [8:0] i; // loop index
// A real LOP uses a fast carry chain to find only the first 0.
// It is an example of a parallel prefix algorithm. For the sake
// of simplicity, this model is behavioral instead.
// A real LOP would also operate on the sources of the adder, not
// the result!
always_comb
begin
i = 0;
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
normcnt = i; // compute shift count
end
// Also check if sum is zero
assign sumzero = ~(|sum);
endmodule

View File

@ -1,136 +0,0 @@
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
/////////////////////////////////////////////////////////////////////////////
input logic [51:0] xman; // Fraction of multiplicand x
input logic [51:0] yman; // Fraction of multiplicand y
input logic xdenormE; // is x denormalized
input logic ydenormE; // is y denormalized
input logic xzeroE; // Z is denorm
input logic yzeroE; // Z is denorm
output logic [105:0] rE; // partial product 1
output logic [105:0] sE; // partial product 2
wire [54:0] yExt; //y with appended 0 and assumed 1
wire [53:0] xExt; //y with assumed 1
wire [26:0][1:0] add1;
wire [26:0][54:0] pp;
wire [26:0] e;
logic [106:0] tmpsE;
logic [17:0][106:0] lv1add;
logic [11:0][106:0] lv2add;
logic [7:0][106:0] lv3add;
logic [3:0][106:0] lv4add;
logic [21:0][107:0] carryTmp;
wire [26:0][106:0] acc;
// wire [105:0] acc
genvar i;
assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
generate
for(i=0; i<27; i=i+1) begin
booth booth(.xExt(xExt), .choose(yExt[(i*2)+2:i*2]), .add1(add1[i]), .e(e[i]), .pp(pp[i]));
end
endgenerate
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
assign acc[26] = {pp[26],add1[25], 50'b0};
//*** resize adders
generate
for(i=0; i<9; i=i+1) begin
add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
.carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
end
endgenerate
generate
for(i=0; i<6; i=i+1) begin
add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
.carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
end
endgenerate
generate
for(i=0; i<4; i=i+1) begin
add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
.carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
end
endgenerate
generate
for(i=0; i<2; i=i+1) begin
add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
end
endgenerate
add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
.carry(carryTmp[21]), .sum(tmpsE));
assign sE = tmpsE[105:0];
assign rE = {carryTmp[21][104:0], 1'b0};
// assign rE = 0;
// assign sE = acc[0] +
// acc[1] +
// acc[2] +
// acc[3] +
// acc[4] +
// acc[5] +
// acc[6] +
// acc[7] +
// acc[8] +
// acc[9] +
// acc[10] +
// acc[11] +
// acc[12] +
// acc[13] +
// acc[14] +
// acc[15] +
// acc[16] +
// acc[17] +
// acc[18] +
// acc[19] +
// acc[20] +
// acc[21] +
// acc[22] +
// acc[23] +
// acc[24] +
// acc[25] +
// acc[26];
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
// assign rE = 0;
endmodule

View File

@ -1,147 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: normalize.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block performs the normalization shift. It also
// generates the Rands bits for rounding. Finally, it
// handles the special case of a zero sum.
//
// v[53:2] is the fraction component of the prerounded result.
// It can be bypassed back to the X or Z inputs of the FMAC
// for back-to-back operations.
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
/////////////////////////////////////////////////////////////////////////////
input logic [163:0] sum; // sum
input logic [62:52] zexp; // sum
input logic [8:0] normcnt; // normalization shift count
input logic [12:0] aeM; // normalization shift count
input logic [12:0] aligncntM; // normalization shift count
input logic [8:0] sumshiftM; // normalization shift count
input logic sumshiftzeroM;
input logic sumzero; // sum is zero
input logic bsM; // sticky bit for addend
input logic xdenormM; // Input Z is denormalized
input logic ydenormM; // Input Z is denormalized
input logic zdenormM; // Input Z is denormalized
input logic xzeroM;
input logic yzeroM;
input logic zzeroM;
output logic sticky; //sticky bit
output logic [12:0] de0;
output logic resultdenorm; // Input Z is denormalized
output logic [53:0] v; // normalized sum, R, S bits
// Internal nodes
logic [163:0] sumshifted; // shifted sum
logic [9:0] sumshifttmp;
logic [163:0] sumshiftedtmp; // shifted sum
logic isShiftLeft1;
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// When the sum is zero, normalization does not apply and only the
// sticky bit must be computed. Otherwise, the sum is right-shifted
// and the Rand S bits (v[1] and v[O], respectively) are assigned.
// The R bit is also set on denormalized numbers where the exponent
// was computed to be exactly -1023 and the L bit was set. This
// is required for correct rounding up of multiplication results.
// The sticky bit calculation is actually built into the shifter and
// does not require a true subtraction shown in the model.
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
always_comb
begin
// d = aligncntM
// l = normcnt
// p = 53
// ea + eb = aeM
// set d<=2 to d<=0
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
// product anchored or cancellation
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
//normal result
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
resultdenorm = |sum & ~|de0 | de0[12];
// if z is zero then there was a 56 bit shift of the product
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
//de0 = aeM-normcnt+2-1023;
end else begin
sumshifted = sum << (13'd1080+aeM);
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
resultdenorm = 1;
de0 = 0;
end
end else begin // extract normalized bits
sumshifttmp = {1'b0,sumshiftM} - 2;
sumshifted = sumshifttmp[9] ? sum : sum << sumshifttmp;
tmp1 = (sumshifted[163] & ~sumshifttmp[9]);
tmp2 = ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]);
tmp3 = (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1]));
tmp4 = sumshifted[160];
tmp5 = sumshifted[159];
// for some reason use exp = zexp + {0,1,2}
// the book says exp = zexp + {-1,0,1}
if(sumshiftzeroM) begin
v = sum[162:109];
sticky = (|sum[108:0]) | bsM;
de0 = {2'b0,zexp};
end else if(sumshifted[163] & ~sumshifttmp[9])begin
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
de0 = {2'b0,zexp} +13'd2;
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
v = sumshifted[161:108];
sticky = (|sumshifted[107:0]) | bsM;
de0 = {2'b0,zexp}+13'd1;
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1;
de0 = {2'b0,zexp}+{12'b0,zdenormM};
end else if(sumshifted[160]& ~zdenormM) begin
de0 = {2'b0,zexp}-13'b1;
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
sticky = (|sumshifted[105:0]) | bsM;
//de0 = zexp-1;
end else if(sumshifted[159]& ~zdenormM) begin
//v = sumshifted[158:105];
de0 = {2'b0,zexp}-13'd2;
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
sticky = (|sumshifted[104:0]) | bsM;
//de0 = zexp-1;
end else if(zdenormM) begin
v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1;
de0 = {{2{zexp[62]}},zexp};
end else begin
de0 = 0;
sumshifted = sum << sumshiftM-1; // p+2+l
v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM;
end
resultdenorm = (~|de0 | de0[12]);
end
end
// shift sum left by normcnt, filling the right with zeros
//assign sumshifted = sum << normcnt;
endmodule

View File

@ -1,124 +0,0 @@
/////////////////////////////////////////////////////////////////////////////
// Block Name: round.v
// Author: David Harris
// Date: 11/2/1995
//
// Block Description:
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
//
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
// muxed in to form the actual result for register file writeback. This
// saves a mux from the writeback path.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module round(v, sticky, FrmM, wsign,
FmaFlagsM, inf, nanM, xnanM, ynanM, znanM,
xman, yman, zman,
wman, infinity, specialsel,expplus1);
/////////////////////////////////////////////////////////////////////////////
input logic [53:0] v; // normalized sum, R, S bits
input logic sticky; //sticky bit
input logic [2:0] FrmM;
input logic wsign; // Sign of result
input logic [4:0] FmaFlagsM;
input logic inf; // Some input logic is infinity
input logic nanM; // Some input logic is NaN
input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN
input logic [51:0] xman; // input logic X
input logic [51:0] yman; // input logic Y
input logic [51:0] zman; // input logic Z
output logic [51:0] wman; // rounded result of FMAC
output logic infinity; // Generate infinity on overflow
output logic specialsel; // Select special result
output logic expplus1;
// Internal nodes
logic plus1; // Round by adding one
wire [52:0] v1; // Result + 1 (for rounding)
wire [51:0] specialres; // Result of exceptional case
wire [51:0] infinityres; // Infinity or largest real number
wire [51:0] nanres; // Propagated or generated NaN
// Compute if round should occur. This equation is derived from
// the rounding tables.
// round to infinity - plus1 if positive
// round to -infinity - plus1 if negitive
// round to zero - do nothing
// round to nearest even
// {v[1], v[0], sticky}
// 0xx - do nothing
// 100 - tie - plus1 if v[2] = 1
// 101/110/111 - plus1
always_comb begin
case (FrmM)
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
3'b001: plus1 = 0;//round to zero
3'b010: plus1 = wsign;//round down
3'b011: plus1 = ~wsign;//round up
3'b100: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&~wsign)));//round to nearest max magnitude
default: plus1 = 1'bx;
endcase
end
// assign plus1 = (rn & v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2]))) |
// (rp & ~wsign) |
// (rm & wsign);
//assign plus1 = rn && ((v[1] && v[0]) || (v[2] && (v[1]))) ||
// rp && ~wsign && (v[1] || v[0]) ||
// rm && wsign && (v[1] || v[0]);
// Compute rounded result
assign v1 = v[53:2] + 1;
// Determine if postnormalization is necessary
// Predicted by all bits =1 before round +1
//assign postnormalize = &(v[53:2]) && plus1;
// Determine special result in event of of selection of a result from
// another FPU functional unit, infinity, NAN, or underflow
// The special result mux is a 4:1 mux that should not appear in the
// critical path of the machine. It is not priority encoded, despite
// the code below suggesting otherwise. Also, several of the identical data
// input logics to the wide muxes can be combined at the expense of more
// complicated non-critical control in the circuit implementation.
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
nanM || inf;
assign specialres = FmaFlagsM[4] | nanM ? nanres : //invalid
FmaFlagsM[2] ? infinityres : //overflow
inf ? 52'b0 :
FmaFlagsM[1] ? 52'b0 : 52'bx; // underflow
// Overflow is handled differently for different rounding modes
// Round is to either infinity or to maximum finite number
assign infinity = |FrmM;//rn || (rp && ~wsign) || (rm && wsign);//***look into this
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
// Invalid operations produce a quiet NaN. The result should
// propagate an input logic if the input logic is NaN. Since we assume all
// NaN input logics are already quiet, we don't have to force them quiet.
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
// IEEE 754-2008 section 6.2.3 states:
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input logic NaNs if representable in the destination
// format. This standard does not specify which of the input logic NaNs will provide the payload."
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
// Select result with 4:1 mux
// If the sum is zero and we round up, there is a special case in
// which we produce a massive loss of significance and trap to software.
// It is handled in the exception unit.
assign expplus1 = v1[52] & ~specialsel & plus1;
assign wman = specialsel ? specialres : (plus1 ? v1[51:0] : v[53:2]);
endmodule

View File

@ -1,111 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: sign.v
// Author: David Harris
// Date: 12/1/1995
//
// Block Description:
// This block manages the signs of the numbers.
// 1 = negative
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
////////////////////////////////////////////////////////////////////////////I
input logic xsign; // Sign of X
input logic ysign; // Sign of Y
input logic zsign; // Sign of Z
input logic isAdd;
input logic negsum0; // Sum in +O mode is negative
input logic negsum1; // Sum in +1 mode is negative
input logic bsM; // sticky bit from addend
input logic [2:0] FrmM; // Round toward minus infinity
input logic [4:0] FmaFlagsM; // Round toward minus infinity
input logic sumzero; // Sum = O
input logic zinfM; // Y = Inf
input logic inf; // Some input logic = Inf
output logic wsign; // Sign of W
output logic invz; // Invert addend into adder
output logic negsum; // Negate result of adder
output logic selsum1; // Select +1 mode from compound adder
// Internal nodes
wire zerosign; // sign if result= 0
wire sumneg; // sign if result= 0
wire infsign; // sign if result= Inf
logic tmp;
// Compute sign of product
assign psign = xsign ^ ysign;
// Invert addend if sign of Z is different from sign of product assign invz = zsign ^ psign;
//do you invert z
assign invz = (zsign ^ psign);
assign selsum1 = invz;
//negate sum if its negitive
assign negsum = (selsum1&negsum1) | (~selsum1&negsum0);
// is the sum negitive
// if p - z is the sum negitive
// if -p + z is the sum positive
// if -p - z then the sum is negitive
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
//always @(invz or negsum0 or negsum1 or bsM or ps)
// begin
// if (~invz) begin // both input logics have same sign
// negsum = 0;
// selsum1 = 0;
// end else if (bsM) begin // sticky bit set on addend
// selsum1 = 0;
// negsum = negsum0;
// end else if (ps) begin // sticky bit set on product
// selsum1 = 1;
// negsum = negsum1;
// end else begin // both sticky bits clear
// //selsum1 = negsum1; // KEP 210113-10:44 Selsum1 was adding 1 to values that were multiplied by 0
// selsum1 = ~negsum1; //original
// negsum = negsum1;
// end
//end
// Compute sign of result
// This involves a special case when the sum is zero:
// x+x retains the same sign as x even when x = +/- 0.
// otherwise, x-x = +O unless in the RM mode when x-x = -0
// There is also a special case for NaNs and invalid results;
// the sign of the NaN produced is forced to be 0.
// Sign calculation is not in the critical path so the cases
// can be tolerated.
// IEEE 754-2008 section 6.3 states
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
// also pertaining to negZero it states:
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
// sum/difference shall be -0. However, x+x = x-(-X) retains the same sign as x even when x is zero."
//assign zerosign = (~invz && killprodM) ? zsign : rm;//***look into
// assign zerosign = (~invz && killprodM) ? zsign : 0;
// zero sign
// if product underflows then use psign
// otherwise
// addition
// if cancelation then 0 unless round to -inf
// otherwise psign
// subtraction
// if cancelation then 0 unless round to -inf
// otherwise psign
assign zerosign = FmaFlagsM[1] ? psign :
(isAdd ? (psign^zsign ? FrmM == 3'b010 : psign) :
(psign^zsign ? psign : FrmM == 3'b010));
assign infsign = zinfM ? zsign : psign; //KEP 210112 keep the correct sign when result is infinity
//assign infsign = xinfM ? (yinfM ? psign : xsign) : yinfM ? ysign : zsign;//original
assign tmp = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : psign ^ negsum));
assign wsign = FmaFlagsM[4] ? 0 : (inf ? infsign :(sumzero ? zerosign : sumneg));
endmodule

View File

@ -1,67 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
// Block Name: special.v
// Author: David Harris
// Date: 12/2/1995
//
// Block Description:
// This block implements special case handling for unusual operands (e.g.
// 0, NaN, denormalize, infinity). The block consists of zero/one detectors.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
/////////////////////////////////////////////////////////////////////////////
input logic [63:0] ReadData1E; // Input ReadData1E
input logic [63:0] ReadData2E; // Input ReadData2E
input logic [63:0] ReadData3E; // Input ReadData3E
output logic xzeroE; // Input ReadData1E = 0
output logic yzeroE; // Input ReadData2E = 0
output logic zzeroE; // Input ReadData3E = 0
output logic xnanE; // ReadData1E is NaN
output logic ynanE; // ReadData2E is NaN
output logic znanE; // ReadData3E is NaN
output logic xdenormE; // ReadData1E is denormalized
output logic ydenormE; // ReadData2E is denormalized
output logic zdenormE; // ReadData3E is denormalized
output logic xinfE; // ReadData1E is infinity
output logic yinfE; // ReadData2E is infinity
output logic zinfE; // ReadData3E is infinity
// In the actual circuit design, the gates looking at bits
// 51:0 and at bits 62:52 should be shared among the various detectors.
// Check if input is NaN
assign xnanE = &ReadData1E[62:52] && |ReadData1E[51:0];
assign ynanE = &ReadData2E[62:52] && |ReadData2E[51:0];
assign znanE = &ReadData3E[62:52] && |ReadData3E[51:0];
// Check if input is denormalized
assign xdenormE = ~(|ReadData1E[62:52]) && |ReadData1E[51:0];
assign ydenormE = ~(|ReadData2E[62:52]) && |ReadData2E[51:0];
assign zdenormE = ~(|ReadData3E[62:52]) && |ReadData3E[51:0];
// Check if input is infinity
assign xinfE = &ReadData1E[62:52] && ~(|ReadData1E[51:0]);
assign yinfE = &ReadData2E[62:52] && ~(|ReadData2E[51:0]);
assign zinfE = &ReadData3E[62:52] && ~(|ReadData3E[51:0]);
// Check if inputs are all zero
// Also forces denormalized inputs to zero.
// In the circuit implementation, this can be optimized
// to just check if the exponent is zero.
// KATHERINE - commented following (21/01/11)
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
assign xzeroE = ~(|ReadData1E[62:0]);
assign yzeroE = ~(|ReadData2E[62:0]);
assign zzeroE = ~(|ReadData3E[62:0]);
endmodule

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,417 @@
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//
module faddcvt(
input logic clk,
input logic reset,
input logic FlushM,
input logic StallM,
input logic [63:0] SrcXE, // 1st input operand (A)
input logic [63:0] SrcYE, // 2nd input operand (B)
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
input logic [2:0] FrmM, // Rounding mode - specify values
output logic [63:0] FAddResM, // Result of operation
output logic [4:0] FAddFlgM); // IEEE exception flags
logic [63:0] AddSumE, AddSumM;
logic [63:0] AddSumTcE, AddSumTcM;
logic [3:0] AddSelInvE, AddSelInvM;
logic [10:0] AddExpPostSumE,AddExpPostSumM;
logic AddCorrSignE, AddCorrSignM;
logic AddOp1NormE, AddOp1NormM;
logic AddOp2NormE, AddOp2NormM;
logic AddOpANormE, AddOpANormM;
logic AddOpBNormE, AddOpBNormM;
logic AddInvalidE, AddInvalidM;
logic AddDenormInE, AddDenormInM;
logic AddSwapE, AddSwapM;
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
logic AddSignAE, AddSignAM;
logic AddConvertE, AddConvertM;
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentE, AddExponentM;
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
endmodule
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
input logic [63:0] SrcXE; // 1st input operand (A)
input logic [63:0] SrcYE; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single)
wire P;
assign P = ~FmtE;
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [11:0] exp_shift;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire exp_gt63;
wire Sticky_out;
wire sub;
wire zeroB;
wire [5:0] align_shift;
output logic [63:0] AddFloat1E;
output logic [63:0] AddFloat2E;
output logic [10:0] AddExponentE;
output logic [10:0] AddExpPostSumE;
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
output logic [63:0] AddSumE, AddSumTcE;
output logic [3:0] AddSelInvE;
output logic AddCorrSignE;
output logic AddSignAE;
output logic AddOp1NormE, AddOp2NormE;
output logic AddOpANormE, AddOpBNormE;
output logic AddInvalidE;
output logic AddDenormInE;
// output logic exp_valid;
output logic AddConvertE;
output logic AddSwapE;
output logic AddNormOvflowE;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the FOpCtrlE , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "AddSelInvE" is used in
// the third pipeline stage to select the result. Also, AddOp1NormE
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
AddFloat1E, AddFloat2E, FOpCtrlE);
// Perform Exponent Subtraction (used for alignment). For performance
// both exponent subtractions are performed in parallel. This was
// changed to a behavior level to allow the tools to try to optimize
// the two parallel additions. The input values are zero-extended to 12
// bits prior to performing the addition.
assign exp1 = {1'b0, AddFloat1E[62:52]};
assign exp2 = {1'b0, AddFloat2E[62:52]};
assign exp_diff1 = exp1 - exp2;
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
// The second operand (B) should be set to zero, if FOpCtrlE does not
// specify addition or subtraction
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
// Swapped operands if zeroB is not one and exp1 < exp2.
// Swapping causes exp2 to be used for the result exponent.
// Only the exponent of the larger operand is used to determine
// the final result.
assign AddSwapE = exp_diff1[11] & ~zeroB;
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
// Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
// 001.M[51]M[50] ... M[1]M[0]00
// Unless the number has an exponent of zero, in which case it
// is unpacked as
// 000.00 ... 00
// This effectively flushes denormalized values to zero.
// The three bits of to the left of the binary point prevent overflow
// and loss of sign information. The two bits to the right of the
// original mantissa form the "guard" and "round" bits that are used
// to round the result.
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
// Perform mantissa alignment using a 57-bit barrel shifter
// If any of the bits shifted out are one, Sticky_out is set.
// The size of the barrel shifter could be reduced by two bits
// by not adding the leading two zeros until after the shift.
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = SrcXE[31:0];
assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
// and the exponent value is left unchanged.
// Under denormalized cases, the exponent before the rounder is set to 1
// if the normal shift value is 11.
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
// zeros.
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
// The sign of the result needs to be corrected if the true
// operation is subtraction and the input operands were swapped.
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
// Finds normal underflow result to determine whether to round final exponent down
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
endmodule // fpadd
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and AddConvertM SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put AddSumM onto output.
//
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
input [2:0] FrmM; // Rounding mode - specify values
input [3:0] FOpCtrlM; // Function opcode
input FmtM; // Result Precision (0 for double, 1 for single)
// input AddOvEnM; // Overflow trap enabled
// input AddUnEnM; // Underflow trap enabled
input [63:0] AddSumM, AddSumTcM;
input [63:0] AddFloat1M;
input [63:0] AddFloat2M;
input [11:0] AddExp1DenormM, AddExp2DenormM;
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
//input exp_valid;
input [3:0] AddSelInvM;
input AddOp1NormM, AddOp2NormM;
input AddOpANormM, AddOpBNormM;
input AddInvalidM;
input AddDenormInM;
input AddSignAM;
input AddCorrSignM;
input AddConvertM;
input AddSwapM;
// input AddNormOvflowM;
output [63:0] FAddResM; // Result of operation
output [4:0] FAddFlgM; // IEEE exception flags
wire AddDenormM; // AddDenormM on input or output
wire P;
assign P = ~FmtM;
wire [10:0] exp_pre;
wire [63:0] Result;
wire [63:0] sum_norm, sum_norm_w_bypass;
wire [5:0] norm_shift, norm_shift_denorm;
wire exp_valid;
wire DenormIO;
wire [4:0] FlagsIn;
wire Sticky_out;
wire sign_corr;
wire zeroB;
wire [10:0] AddExpPostSumM;
wire mantissa_comp;
wire mantissa_comp_sum;
wire mantissa_comp_sum_tc;
wire Float1_sum_comp;
wire Float2_sum_comp;
wire Float1_sum_tc_comp;
wire Float2_sum_tc_comp;
wire normal_underflow;
wire [63:0] sum_corr;
logic AddNormOvflowM;
logic AddOvEnM; // Overflow trap enabled
logic AddUnEnM; // Underflow trap enabled
assign AddOvEnM = 1'b1;
assign AddUnEnM = 1'b1;
//AddExponentM value pre-rounding with considerations for denormalized
//cases/conversion cases
assign exp_pre = AddDenormInM ?
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
: (AddConvertM ? 11'b10000111100 : AddExponentM);
// Finds normal underflow result to determine whether to round final AddExponentM down
// Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
// Determines the correct Float value to compare based on AddSwapM result
assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
// Determines the correct comparison result based on operation and sign of resulting AddSumM
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
// If the signs are different and both operands aren't denormalized
// the normal underflow bit is needed and therefore updated.
assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
// Determine the correct sign of the result
assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];
// If the AddSumM is negative, use its two complement instead.
// This value has to be 64-bits to correctly handle the
// case 10...00
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
// Finds normal underflow result to determine whether to round final AddExponentM down
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);
// Barell shifter used for normalization. It takes as inputs the
// the corrected AddSumM and the amount by which the AddSumM should
// be right shifted. It outputs the normalized AddSumM.
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
// Round the mantissa to a 52-bit value, with the leading one
// removed. If the result is a single precision number, the actual
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
// At this point, normalization has already been performed, so we know
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
// Store the final result and the exception flags in registers.
assign FAddResM = Result;
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -64,38 +64,38 @@ module fctrl (
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.s.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.s.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.s.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.s.lu
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.s.w
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.s.wu
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.s.l
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.s.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_1_100_0010_00_00_0_0; // fcvt.w.s
2'b01: ControlsD = `FCTRLW'b1_1_100_0110_00_00_0_0; // fcvt.wu.s
2'b10: ControlsD = `FCTRLW'b1_1_100_1010_00_00_0_0; // fcvt.l.s
2'b11: ControlsD = `FCTRLW'b1_1_100_1110_00_00_0_0; // fcvt.lu.s
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.s
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.s
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.s
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.d.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.d.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.d.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.d.lu
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.s.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.d.w
2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.d.wu
2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.d.l
2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.d.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0010_00_00_0_0; // fcvt.w.d
2'b01: ControlsD = `FCTRLW'b1_0_100_0110_00_00_0_0; // fcvt.wu.d
2'b10: ControlsD = `FCTRLW'b1_0_100_1010_00_00_0_0; // fcvt.l.d
2'b11: ControlsD = `FCTRLW'b1_0_100_1110_00_00_0_0; // fcvt.lu.d
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.d
2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.d
2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.d
2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.d
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fcvt.d.s
7'b0100001: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
@ -109,7 +109,7 @@ module fctrl (
// Precision
// 0-single
// 1-double
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0];
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
// div/sqrt
// fdiv = ???0
// fsqrt = ???1

View File

@ -0,0 +1,163 @@
// `include "wally-config.vh"
module fcvt (
input logic [63:0] X,
input logic [64-1:0] SrcAE,
input logic [3:0] FOpCtrlE,
input logic [2:0] FrmE,
input logic FmtE,
output logic [63:0] CvtResE,
output logic [4:0] CvtFlgE);
logic [10:0] XExp;
logic [51:0] XFrac;
logic XSgn;
logic [10:0] ResExp,TmpExp;
logic [51:0] ResFrac;
logic ResSgn;
logic [10:0] NormCnt;
logic [11:0] Bias; // 1023 for double, 127 for single
logic [7:0] Bits, SubBits;
logic [64+51:0] ShiftedManTmp;
logic [64+51:0] ShiftVal;
logic [64+1:0] ShiftedMan;
logic [64:0] RoundedTmp;
logic [63:0] Rounded;
logic [12:0] ExpVal, ShiftCnt;
logic [64-1:0] PosInt;
logic [64-1:0] CvtIntRes;
logic [63:0] CvtRes;
logic XFracZero, Of,Uf;
logic XExpMax;
logic XNaN, XDenorm, XInf, XZero;
logic Plus1,CalcPlus1, Guard, Round, LSB, Sticky;
logic SgnRes, In64;
logic Res64;
logic RoundMSB;
logic RoundSgn;
logic XExpZero;
// fcvt.w.s = 0010 -
// fcvt.wu.s = 0110 -
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.l.s = 1010 -
// fcvt.lu.s = 1110 -
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010 -
// fcvt.wu.d = 0110 -
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.l.d = 1010 -
// fcvt.lu.d = 1110 -
// fcvt.d.l = 1001 --
// fcvt.d.lu = 1101 --
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
assign XSgn = X[63];
assign XExp = FmtE ? X[62:52] : {3'b0, X[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign XExpZero = ~|XExp;
assign XFracZero = ~|XFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign XNaN = XExpMax & ~XFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign XInf = XExpMax & XFracZero;
assign XZero = XExpZero & XFracZero;
assign Bias = FmtE ? 12'h3ff : 12'h7f;
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
assign SubBits = In64 ? 8'd64 : 8'd32;
assign Bits = Res64 ? 8'd64 : 8'd32;
assign ExpVal = XExp - Bias + XDenorm;
////////////////////////////////////////////////////////
logic [64-1:0] IntIn;
assign IntIn = FOpCtrlE[3] ? SrcAE : {SrcAE[31:0], 32'b0};
assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
// Leading one detector
logic [8:0] i;
always_comb begin
i = 0;
while (~PosInt[64-1-i] && i <= 64) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
end
assign TmpExp = i==64 ? 0 : Bias + SubBits - NormCnt;
////////////////////////////////////////////
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : NormCnt;
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
//if shift = -1 then shift one bit right for round to nearest (shift over 2 never rounds)
// if the shift is negitive add bit for sticky bit
// otherwise shift left
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {115'b0, ~XZero} : ShiftVal << ShiftCnt;
assign ShiftedMan = ShiftedManTmp[64+51:50];
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
// determine guard, round, and least significant bit of the result
assign Guard = FOpCtrlE[1] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42];
assign Round = FOpCtrlE[1] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
assign LSB = FOpCtrlE[1] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
always_comb begin
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = (XSgn&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
3'b011: CalcPlus1 = (~XSgn&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
end
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
// Choose result
// double to unsigned long
// >2^64-1 or +inf or NaN - all 1's
// <0 or -inf - zero
// otherwise rounded result
//assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
assign CvtIntRes = Of ? FOpCtrlE[2] ? SgnRes ? {32'b0, {32{1'b1}}}: {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} :
Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
Rounded[64-1:0];
assign CvtRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
assign CvtResE = FOpCtrlE[0] ? CvtRes : CvtIntRes;
assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
endmodule // fpadd

View File

@ -1,3 +1,231 @@
module fma(
input logic clk,
input logic reset,
input logic FlushM,
input logic StallM,
input logic [63:0] SrcXE, SrcXM, // X
input logic [63:0] SrcYE, SrcYM, // Y
input logic [63:0] SrcZE, SrcZM, // Z
input logic FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic [63:0] FMAResM,
output logic [4:0] FMAFlgM);
logic [105:0] ProdManE, ProdManM;
logic [161:0] AlignedAddendE, AlignedAddendM;
logic [12:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XNaNE, .YNaNE, .ZNaNE );
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM, .FrmM, .FmtM,
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
.FMAResM, .FMAFlgM);
endmodule
module fma1(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
logic [63:0] Addend; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
///////////////////////////////////////////////////////////////////////////////
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63];
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f;
///////////////////////////////////////////////////////////////////////////////
// determine if an input is a special value
///////////////////////////////////////////////////////////////////////////////
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign YDenorm = YExpZero & ~YFracZero;
assign ZDenorm = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenorm + YDenorm;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
// verilator lint_on WIDTH
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted << -AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
KillProdE = 0;
ZManShifted = 0;
AddendStickyE = ~ZZeroE;
end
end
assign AlignedAddendE = ZManShifted[213:52];
endmodule
module fma2(

View File

@ -1,184 +0,0 @@
module fma1(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
logic [63:0] Addend; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
///////////////////////////////////////////////////////////////////////////////
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63];
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f;
///////////////////////////////////////////////////////////////////////////////
// determine if an input is a special value
///////////////////////////////////////////////////////////////////////////////
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign YDenorm = YExpZero & ~YFracZero;
assign ZDenorm = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenorm + YDenorm;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
// verilator lint_on WIDTH
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted << -AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the Addend is shifted right (positive AlignCnt)
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
KillProdE = 0;
ZManShifted = 0;
AddendStickyE = ~ZZeroE;
end
end
assign AlignedAddendE = ZManShifted[213:52];
endmodule

View File

@ -45,7 +45,7 @@ module fpu (
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
generate
if (`F_SUPPORTED) begin
if (`F_SUPPORTED | `D_SUPPORTED) begin
// control logic signal instantiation
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
@ -75,39 +75,15 @@ module fpu (
logic [63:0] DivInput1E, DivInput2E;
logic HoldInputs; // keep forwarded inputs arround durring division
// FMA signals
logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units
logic [161:0] AlignedAddendE, AlignedAddendM;
logic [12:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
//fpu signals
logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW;
// add/cvt signals
logic [63:0] AddSumE, AddSumM;
logic [63:0] AddSumTcE, AddSumTcM;
logic [3:0] AddSelInvE, AddSelInvM;
logic [10:0] AddExpPostSumE,AddExpPostSumM;
logic AddCorrSignE, AddCorrSignM;
logic AddOp1NormE, AddOp1NormM;
logic AddOp2NormE, AddOp2NormM;
logic AddOpANormE, AddOpANormM;
logic AddOpBNormE, AddOpBNormM;
logic AddInvalidE, AddInvalidM;
logic AddDenormInE, AddDenormInM;
logic AddSwapE, AddSwapM;
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
logic AddSignAE, AddSignAM;
logic AddConvertE, AddConvertM;
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentE, AddExponentM;
logic [63:0] FAddResM, FAddResW;
logic [4:0] FAddFlgM, FAddFlgW;
logic [63:0] CvtResE, CvtResM;
logic [4:0] CvtFlgE, CvtFlgM;
// cmp signals
logic CmpNVE, CmpNVM, CmpNVW;
@ -117,7 +93,7 @@ module fpu (
logic [63:0] SgnResE, SgnResM;
logic SgnNVE, SgnNVM, SgnNVW;
logic [63:0] FResM, FResW;
logic FFlgM, FFlgW;
logic [4:0] FFlgM, FFlgW;
// instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM;
@ -198,9 +174,10 @@ module fpu (
// first of two-stage instance of floating-point fused multiply-add unit
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XNaNE, .YNaNE, .ZNaNE );
fma fma (.clk, .reset, .FlushM, .StallM,
.SrcXE, .SrcYE, .SrcZE, .SrcXM, .SrcYM, .SrcZM,
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
// first and only instance of floating-point divider
logic fpdivClk;
@ -225,10 +202,8 @@ module fpu (
// first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
.SrcXE, .SrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
// first and only instance of floating-point comparator
fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE);
@ -239,6 +214,9 @@ module fpu (
// first and only instance of floating-point classify unit
fclassify fclassify (.SrcXE, .FmtE, .ClassResE);
fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
// output for store instructions
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
//***swap to mux
@ -259,31 +237,16 @@ module fpu (
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
{FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
@ -299,29 +262,27 @@ module fpu (
//BEGIN MEMORY STAGE
mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM);
mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM);
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
//***change to mux
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// second instance of two-stage FMA unit
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM,
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
.FMAResM, .FMAFlgM);
// second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
always_comb begin
case (FResultSelM)
3'b000 : SetFflagsM = 5'b0;
3'b001 : SetFflagsM = FMAFlgM;
3'b010 : SetFflagsM = FAddFlgM;
3'b011 : SetFflagsM = FDivSqrtFlgM;
3'b100 : SetFflagsM = FFlgM;
default : SetFflagsM = 5'bxxxxx;
endcase
end
@ -334,19 +295,14 @@ module fpu (
// M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW);
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW);
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
@ -363,20 +319,6 @@ module fpu (
//***turn into muxs
always_comb begin
case (FResultSelW)
3'b000 : FPUFlagsW = 5'b0;
3'b001 : FPUFlagsW = FMAFlgW;
3'b010 : FPUFlagsW = FAddFlgW;
3'b011 : FPUFlagsW = FDivSqrtFlgW;
3'b100 : FPUFlagsW = {4'b0,FFlgW};
default : FPUFlagsW = 5'bxxxxx;
endcase
end
always_comb begin
case (FResultSelW)
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
@ -393,13 +335,11 @@ module fpu (
// floating-point results
//
// define offsets for LSB zero extension or truncation
always_comb begin
// zero extension
//***turn into mux
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
//*** put into mem stage
SetFflagsM = FPUFlagsW;
end
always_comb begin
// zero extension
//***turn into mux
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
end
end else begin // no F_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;

View File

@ -1,198 +0,0 @@
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
input logic [63:0] SrcXE; // 1st input operand (A)
input logic [63:0] SrcYE; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single)
wire P;
assign P = ~FmtE | FOpCtrlE[2];
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
wire [11:0] exp_diff1, exp_diff2;
wire [11:0] exp_shift;
wire [51:0] mantissaA;
wire [56:0] mantissaA1;
wire [63:0] mantissaA3;
wire [51:0] mantissaB;
wire [56:0] mantissaB1, mantissaB2;
wire [63:0] mantissaB3;
wire exp_gt63;
wire Sticky_out;
wire sub;
wire zeroB;
wire [5:0] align_shift;
output logic [63:0] AddFloat1E;
output logic [63:0] AddFloat2E;
output logic [10:0] AddExponentE;
output logic [10:0] AddExpPostSumE;
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
output logic [63:0] AddSumE, AddSumTcE;
output logic [3:0] AddSelInvE;
output logic AddCorrSignE;
output logic AddSignAE;
output logic AddOp1NormE, AddOp2NormE;
output logic AddOpANormE, AddOpBNormE;
output logic AddInvalidE;
output logic AddDenormInE;
// output logic exp_valid;
output logic AddConvertE;
output logic AddSwapE;
output logic AddNormOvflowE;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the FOpCtrlE , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "AddSelInvE" is used in
// the third pipeline stage to select the result. Also, AddOp1NormE
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
AddFloat1E, AddFloat2E, FOpCtrlE);
// Perform Exponent Subtraction (used for alignment). For performance
// both exponent subtractions are performed in parallel. This was
// changed to a behavior level to allow the tools to try to optimize
// the two parallel additions. The input values are zero-extended to 12
// bits prior to performing the addition.
assign exp1 = {1'b0, AddFloat1E[62:52]};
assign exp2 = {1'b0, AddFloat2E[62:52]};
assign exp_diff1 = exp1 - exp2;
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
// The second operand (B) should be set to zero, if FOpCtrlE does not
// specify addition or subtraction
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
// Swapped operands if zeroB is not one and exp1 < exp2.
// Swapping causes exp2 to be used for the result exponent.
// Only the exponent of the larger operand is used to determine
// the final result.
assign AddSwapE = exp_diff1[11] & ~zeroB;
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
// modified to 52 bits to detect leading zeroes on denormalized mantissas
lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
// Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
// 001.M[51]M[50] ... M[1]M[0]00
// Unless the number has an exponent of zero, in which case it
// is unpacked as
// 000.00 ... 00
// This effectively flushes denormalized values to zero.
// The three bits of to the left of the binary point prevent overflow
// and loss of sign information. The two bits to the right of the
// original mantissa form the "guard" and "round" bits that are used
// to round the result.
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
// Perform mantissa alignment using a 57-bit barrel shifter
// If any of the bits shifted out are one, Sticky_out is set.
// The size of the barrel shifter could be reduced by two bits
// by not adding the leading two zeros until after the shift.
barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = SrcXE[31:0];
assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
// mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
// and the exponent value is left unchanged.
// Under denormalized cases, the exponent before the rounder is set to 1
// if the normal shift value is 11.
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
// zeros.
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
// The sign of the result needs to be corrected if the true
// operation is subtraction and the input operands were swapped.
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
// Finds normal underflow result to determine whether to round final exponent down
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
endmodule // fpadd

View File

@ -1,163 +0,0 @@
//
// File name : fpadd
// Title : Floating-Point Adder/Subtractor
// project : FPU
// Library : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and AddConvertM SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Compare exponents. Swap the operands of exp1 < exp2
// or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
// and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put AddSumM onto output.
//
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
input [2:0] FrmM; // Rounding mode - specify values
input [3:0] FOpCtrlM; // Function opcode
input FmtM; // Result Precision (0 for double, 1 for single)
// input AddOvEnM; // Overflow trap enabled
// input AddUnEnM; // Underflow trap enabled
input [63:0] AddSumM, AddSumTcM;
input [63:0] AddFloat1M;
input [63:0] AddFloat2M;
input [11:0] AddExp1DenormM, AddExp2DenormM;
input [10:0] AddExponentM, AddExpPostSumM; //exp_pre;
//input exp_valid;
input [3:0] AddSelInvM;
input AddOp1NormM, AddOp2NormM;
input AddOpANormM, AddOpBNormM;
input AddInvalidM;
input AddDenormInM;
input AddSignAM;
input AddCorrSignM;
input AddConvertM;
input AddSwapM;
// input AddNormOvflowM;
output [63:0] FAddResM; // Result of operation
output [4:0] FAddFlgM; // IEEE exception flags
wire AddDenormM; // AddDenormM on input or output
wire P;
assign P = ~FmtM | FOpCtrlM[2];
wire [10:0] exp_pre;
wire [63:0] Result;
wire [63:0] sum_norm, sum_norm_w_bypass;
wire [5:0] norm_shift, norm_shift_denorm;
wire exp_valid;
wire DenormIO;
wire [4:0] FlagsIn;
wire Sticky_out;
wire sign_corr;
wire zeroB;
wire [10:0] AddExpPostSumM;
wire mantissa_comp;
wire mantissa_comp_sum;
wire mantissa_comp_sum_tc;
wire Float1_sum_comp;
wire Float2_sum_comp;
wire Float1_sum_tc_comp;
wire Float2_sum_tc_comp;
wire normal_underflow;
wire [63:0] sum_corr;
logic AddNormOvflowM;
logic AddOvEnM; // Overflow trap enabled
logic AddUnEnM; // Underflow trap enabled
assign AddOvEnM = 1'b1;
assign AddUnEnM = 1'b1;
//AddExponentM value pre-rounding with considerations for denormalized
//cases/conversion cases
assign exp_pre = AddDenormInM ?
((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
: (AddConvertM ? 11'b10000111100 : AddExponentM);
// Finds normal underflow result to determine whether to round final AddExponentM down
// Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1;
assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1;
// Determines the correct Float value to compare based on AddSwapM result
assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
// Determines the correct comparison result based on operation and sign of resulting AddSumM
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
// If the signs are different and both operands aren't denormalized
// the normal underflow bit is needed and therefore updated.
assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;
// Determine the correct sign of the result
assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63];
// If the AddSumM is negative, use its two complement instead.
// This value has to be 64-bits to correctly handle the
// case 10...00
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
// Finds normal underflow result to determine whether to round final AddExponentM down
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
// zero; otherwise, it is one.
lz64 lzd1 (norm_shift, exp_valid, sum_corr);
assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);
// Barell shifter used for normalization. It takes as inputs the
// the corrected AddSumM and the amount by which the AddSumM should
// be right shifted. It outputs the normalized AddSumM.
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
// Round the mantissa to a 52-bit value, with the leading one
// removed. If the result is a single precision number, the actual
// mantissa is in the upper 23 bits and the lower 29 bits are zero.
// At this point, normalization has already been performed, so we know
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
// Store the final result and the exception flags in registers.
assign FAddResM = Result;
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -94,14 +94,14 @@ string tests32f[] = '{
"rv64f/I-FSW-01", "2000",
"rv64f/I-FCLASS-S-01", "2000",
"rv64f/I-FADD-S-01", "2000",
// "rv64f/I-FCVT-S-L-01", "2000",
// "rv64f/I-FCVT-S-LU-01", "2000",
// "rv64f/I-FCVT-S-W-01", "2000",
// "rv64f/I-FCVT-S-WU-01", "2000",
// "rv64f/I-FCVT-L-S-01", "2000",
// "rv64f/I-FCVT-LU-S-01", "2000",
// "rv64f/I-FCVT-W-S-01", "2000",
// "rv64f/I-FCVT-WU-S-01", "2000",
"rv64f/I-FCVT-S-L-01", "2000",
"rv64f/I-FCVT-S-LU-01", "2000",
"rv64f/I-FCVT-S-W-01", "2000",
"rv64f/I-FCVT-S-WU-01", "2000",
"rv64f/I-FCVT-L-S-01", "2000",
"rv64f/I-FCVT-LU-S-01", "2000",
"rv64f/I-FCVT-W-S-01", "2000",
"rv64f/I-FCVT-WU-S-01", "2000",
// "rv64f/I-FDIV-S-01", "2000",
"rv64f/I-FEQ-S-01", "2000",
"rv64f/I-FLE-S-01", "2000",
@ -122,6 +122,16 @@ string tests32f[] = '{
string tests64d[] = '{
// "rv64d/I-FDIV-D-01", "2000",
"rv64d/I-FCVT-D-L-01", "2000",
"rv64d/I-FCVT-D-LU-01", "2000",
// "rv64d/I-FCVT-D-S-01", "2000", //the number to be converted is in the lower 32 bits need to change the test
"rv64d/I-FCVT-D-W-01", "2000",
"rv64d/I-FCVT-D-WU-01", "2000",
"rv64d/I-FCVT-L-D-01", "2000",
"rv64d/I-FCVT-LU-D-01", "2000",
// "rv64d/I-FCVT-S-D-01", "2000", //the result is in the lower 32 bits needs to be changed in the imperas test
"rv64d/I-FCVT-W-D-01", "2000",
// "rv64d/I-FCVT-WU-D-01", "2000", //this test needs to be fixed it expects 2^64-1 rather then 2^32-1 (specified in spec)
"rv64d/I-FSD-01", "2000",
"rv64d/I-FLD-01", "2420",
"rv64d/I-FNMADD-D-01", "2000",
@ -134,16 +144,6 @@ string tests32f[] = '{
"rv64d/I-FEQ-D-01", "2000",
"rv64d/I-FADD-D-01", "2000",
"rv64d/I-FCLASS-D-01", "2000",
// "rv64d/I-FCVT-D-L-01", "2000",
// "rv64d/I-FCVT-D-LU-01", "2000",
// "rv64d/I-FCVT-D-S-01", "2000",
// "rv64d/I-FCVT-D-W-01", "2000",
// "rv64d/I-FCVT-D-WU-01", "2000",
// "rv64d/I-FCVT-L-D-01", "2000",
// "rv64d/I-FCVT-LU-D-01", "2000",
// "rv64d/I-FCVT-S-D-01", "2000",
// "rv64d/I-FCVT-W-D-01", "2000",
// "rv64d/I-FCVT-WU-D-01", "2000",
"rv64d/I-FMADD-D-01", "2000",
"rv64d/I-FMUL-D-01", "2000",
"rv64d/I-FMV-D-X-01", "2000",
@ -898,8 +898,22 @@ module instrNameDecTB(
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
else if (funct7 == 7'b1110000 && rs2 == 5'b00000) name = "FMV.X.W";
else if (funct7 == 7'b1111000 && rs2 == 5'b00000) name = "FMV.W.X";
else if (funct7 == 7'b1110001 && rs2 == 5'b00000) name = "FMV.X.D"; // DOUBLE
@ -915,22 +929,50 @@ module instrNameDecTB(
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
else if (funct7[6:2] == 5'b00100) name = "FSGNJN";
else if (funct7[6:2] == 5'b00101) name = "FMAX";
else if (funct7[6:2] == 5'b10100) name = "FLT";
else if (funct7[6:2] == 5'b11100) name = "FCLASS";
else name = "ILLEGAL";
10'b0101111_010: if (funct7[6:2] == 5'b00000) name = "FADD";
10'b1010011_010: if (funct7[6:2] == 5'b00000) name = "FADD";
else if (funct7[6:2] == 5'b00001) name = "FSUB";
else if (funct7[6:2] == 5'b00010) name = "FMUL";
else if (funct7[6:2] == 5'b00011) name = "FDIV";
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
else if (funct7[6:2] == 5'b00100) name = "FSGNJX";
else if (funct7[6:2] == 5'b10100) name = "FEQ";
else name = "ILLEGAL";
@ -941,8 +983,22 @@ module instrNameDecTB(
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00010) name = "FCVT.L.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00011) name = "FCVT.LU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else if (funct7 == 7'b1101000 && rs2 == 5'b00010) name = "FCVT.S.L";
else if (funct7 == 7'b1101000 && rs2 == 5'b00011) name = "FCVT.S.LU";
else if (funct7 == 7'b1100001 && rs2 == 5'b00000) name = "FCVT.W.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00001) name = "FCVT.WU.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00010) name = "FCVT.L.D";
else if (funct7 == 7'b1100001 && rs2 == 5'b00011) name = "FCVT.LU.D";
else if (funct7 == 7'b1101001 && rs2 == 5'b00000) name = "FCVT.D.W";
else if (funct7 == 7'b1101001 && rs2 == 5'b00001) name = "FCVT.D.WU";
else if (funct7 == 7'b1101001 && rs2 == 5'b00010) name = "FCVT.D.L";
else if (funct7 == 7'b1101001 && rs2 == 5'b00011) name = "FCVT.D.LU";
else if (funct7 == 7'b0100000 && rs2 == 5'b00001) name = "FCVT.S.D";
else if (funct7 == 7'b0100001 && rs2 == 5'b00000) name = "FCVT.D.S";
else name = "ILLEGAL";
10'b0000111_010: name = "FLW";
10'b0100111_010: name = "FSW";