forked from Github_Repos/cvw
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main
This commit is contained in:
commit
2368b58cc9
@ -15,16 +15,16 @@ module add(rM, sM, tM, sum,
|
||||
negsum, invz, selsum1, negsum0, negsum1, killprodM);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [105:0] rM; // partial product 1
|
||||
input [105:0] sM; // partial product 2
|
||||
input [163:0] tM; // aligned addend
|
||||
input invz; // invert addend
|
||||
input selsum1; // select +1 mode of compound adder
|
||||
input killprodM; // z >> product
|
||||
input negsum; // Negate sum
|
||||
output [163:0] sum; // sum
|
||||
output negsum0; // sum was negative in +0 mode
|
||||
output negsum1; // sum was negative in +1 mode
|
||||
input logic [105:0] rM; // partial product 1
|
||||
input logic [105:0] sM; // partial product 2
|
||||
input logic [163:0] tM; // aligned addend
|
||||
input logic invz; // invert addend
|
||||
input logic selsum1; // select +1 mode of compound adder
|
||||
input logic killprodM; // z >> product
|
||||
input logic negsum; // Negate sum
|
||||
output logic [163:0] sum; // sum
|
||||
output logic negsum0; // sum was negative in +0 mode
|
||||
output logic negsum1; // sum was negative in +1 mode
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -34,6 +34,7 @@ module add(rM, sM, tM, sum,
|
||||
wire [164:0] sum0; // sum of compound adder +0 mode
|
||||
wire [164:0] sum1; // sum of compound adder +1 mode
|
||||
wire [163:0] prodshifted; // sum of compound adder +1 mode
|
||||
wire [164:0] tmp; // sum of compound adder +1 mode
|
||||
|
||||
// Invert addend if z'sM sign is diffrent from the product'sM sign
|
||||
|
||||
@ -44,11 +45,13 @@ module add(rM, sM, tM, sum,
|
||||
assign r2 = killprodM ? 106'b0 : rM;
|
||||
assign s2 = killprodM ? 106'b0 : sM;
|
||||
|
||||
//***replace this with a more structural cpa that synthisises better
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0;
|
||||
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above
|
||||
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
|
||||
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
|
||||
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
|
||||
|
||||
// Check sign bits in +0/1 modes
|
||||
assign negsum0 = sum0[164];
|
||||
@ -59,3 +62,4 @@ module add(rM, sM, tM, sum,
|
||||
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -15,79 +15,63 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
killprodE, sumshiftE, sumshiftzeroE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [51:0] zman; // Fraction of addend z;
|
||||
input [12:0] aligncntE; // amount to shift
|
||||
input xzeroE; // Input X = 0
|
||||
input yzeroE; // Input Y = 0
|
||||
input zzeroE; // Input Z = 0
|
||||
input zdenormE; // Input Z is denormalized
|
||||
output [163:0] tE; // aligned addend (54 bits left of bpt)
|
||||
output bsE; // sticky bit of addend
|
||||
output killprodE; // Z >> product
|
||||
output [7:0] sumshiftE;
|
||||
output sumshiftzeroE;
|
||||
input logic [51:0] zman; // Fraction of addend z;
|
||||
input logic [12:0] aligncntE; // amount to shift
|
||||
input logic xzeroE; // Input X = 0
|
||||
input logic yzeroE; // Input Y = 0
|
||||
input logic zzeroE; // Input Z = 0
|
||||
input logic zdenormE; // Input Z is denormalized
|
||||
output logic [163:0] tE; // aligned addend (54 bits left of bpt)
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // Z >> product
|
||||
output logic [8:0] sumshiftE;
|
||||
output logic sumshiftzeroE;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [163:0] tE; // aligned addend from shifter
|
||||
reg [215:0] shift; // aligned addend from shifter
|
||||
reg killprodE; // Z >> product
|
||||
reg bsE; // sticky bit of addend
|
||||
reg ps; // sticky bit of product
|
||||
reg zexpsel; // sticky bit of product
|
||||
reg [7:0] i; // temp storage for finding sticky bit
|
||||
wire [52:0] z1; // Z plus 1
|
||||
wire [51:0] z2; // Z selected after handling rounds
|
||||
wire [11:0] align104; // alignment count + 104
|
||||
logic [8:0] sumshiftE;
|
||||
logic sumshiftzeroE;
|
||||
|
||||
|
||||
|
||||
// Compute sign of aligncntE + 104 to check for shifting too far right
|
||||
|
||||
//assign align104 = aligncntE+104;
|
||||
logic [12:0] tmp;
|
||||
|
||||
// Shift addend by alignment count. Generate sticky bits from
|
||||
// addend on right shifts. Handle special cases of shifting
|
||||
// by too much.
|
||||
|
||||
always @(aligncntE or xzeroE or yzeroE or zman or zdenormE or zzeroE)
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// Default to clearing sticky bits
|
||||
bsE = 0;
|
||||
ps = 0;
|
||||
|
||||
// And to using product as primary operand in adder I exponent gen
|
||||
killprodE = xzeroE | yzeroE;
|
||||
// d = aligncntE
|
||||
// p = 53
|
||||
if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1
|
||||
//***try reducing this hardware to use one shifter
|
||||
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
|
||||
//product ancored case with saturated shift
|
||||
sumshiftE = 163; // 3p+4
|
||||
sumshiftzeroE = 0;
|
||||
shift = {1'b1,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if($signed(aligncntE) <= $signed(2)) begin // -2p+1<d<=2
|
||||
|
||||
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
|
||||
// product ancored or cancellation
|
||||
sumshiftE = 57-aligncntE; // p + 2 - d
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0]; // p + 2 - d
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if ($signed(aligncntE)<=$signed(55)) begin // 2 < d <= p+2
|
||||
|
||||
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
|
||||
// addend ancored case
|
||||
// used to be 56 \/ somthing doesn'tE seem right too many typos
|
||||
sumshiftE = 57-aligncntE;
|
||||
// used to be 56 \/ somthing doesn't seem right too many typos
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0];
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
//zexpsel = 1;
|
||||
|
||||
end else begin // d >= p+3
|
||||
// addend anchored case with saturated shift
|
||||
sumshiftE = 0;
|
||||
@ -96,15 +80,9 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
killprodE = 1;
|
||||
//ps = 1;
|
||||
//zexpsel = 1;
|
||||
|
||||
// use some behavioral code to find sticky bit. This is really
|
||||
// done by hardware in the shifter.
|
||||
//if (aligncntE < 0)
|
||||
// for (i=0; i<-aligncntE-52; i = i+1)
|
||||
// bsE = bsE || z2[i];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -1,21 +1,19 @@
|
||||
module booth(xExt, choose, add1, e, pp);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] xExt; // multiplicand xExt
|
||||
input [2:0] choose; // bits needed to choose which encoding
|
||||
output [1:0] add1; // do you add 1
|
||||
output e;
|
||||
output [54:0] pp; // the resultant encoding
|
||||
input logic [53:0] xExt; // multiplicand xExt
|
||||
input logic [2:0] choose; // bits needed to choose which encoding
|
||||
output logic [1:0] add1; // do you add 1
|
||||
output logic e;
|
||||
output logic [54:0] pp; // the resultant encoding
|
||||
|
||||
logic [54:0] pp, temp;
|
||||
logic e;
|
||||
logic [1:0] add1;
|
||||
logic [54:0] temp;
|
||||
logic [53:0] negx;
|
||||
//logic temp;
|
||||
|
||||
assign negx = ~xExt;
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : pp = 55'b0; // 0
|
||||
3'b001 : pp = {1'b0, xExt}; // 1
|
||||
@ -24,10 +22,10 @@ module booth(xExt, choose, add1, e, pp);
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {1'b1, negx}; // -1
|
||||
3'b110 : pp = {1'b1, negx}; // -1
|
||||
3'b111 : pp = 55'hfffffffffffffff; // -0
|
||||
3'b111 : pp = '1; // -0
|
||||
endcase
|
||||
|
||||
always @(choose, xExt, negx)
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : e = 0; // 0
|
||||
3'b001 : e = 0; // 1
|
||||
@ -40,7 +38,7 @@ module booth(xExt, choose, add1, e, pp);
|
||||
endcase
|
||||
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
|
||||
// assign add1 = choose[2];
|
||||
always @(choose)
|
||||
always_comb
|
||||
case (choose)
|
||||
3'b000 : add1 = 2'b0; // 0
|
||||
3'b001 : add1 = 2'b0; // 1
|
||||
|
@ -3,11 +3,11 @@ module add3comp2(a, b, c, carry, sum);
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
output [BITS-1:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
input logic [BITS-1:0] a;
|
||||
input logic [BITS-1:0] b;
|
||||
input logic [BITS-1:0] c;
|
||||
output logic [BITS-1:0] carry;
|
||||
output logic [BITS-1:0] sum;
|
||||
genvar i;
|
||||
|
||||
generate
|
||||
@ -22,12 +22,12 @@ module add4comp2(a, b, c, d, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
parameter BITS = 4;
|
||||
input [BITS-1:0] a;
|
||||
input [BITS-1:0] b;
|
||||
input [BITS-1:0] c;
|
||||
input [BITS-1:0] d;
|
||||
output [BITS:0] carry;
|
||||
output [BITS-1:0] sum;
|
||||
input logic [BITS-1:0] a;
|
||||
input logic [BITS-1:0] b;
|
||||
input logic [BITS-1:0] c;
|
||||
input logic [BITS-1:0] d;
|
||||
output logic [BITS:0] carry;
|
||||
output logic [BITS-1:0] sum;
|
||||
|
||||
logic [BITS-1:0] cout;
|
||||
logic carryTmp;
|
||||
@ -54,11 +54,11 @@ module sng3comp2(a, b, c, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into diffrent implementations of the compressors?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
output carry;
|
||||
output sum;
|
||||
input logic a;
|
||||
input logic b;
|
||||
input logic c;
|
||||
output logic carry;
|
||||
output logic sum;
|
||||
|
||||
logic axorb;
|
||||
|
||||
@ -73,14 +73,14 @@ module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//look into pass gate 4:2 counters?
|
||||
|
||||
input a;
|
||||
input b;
|
||||
input c;
|
||||
input d;
|
||||
input cin;
|
||||
output cout;
|
||||
output carry;
|
||||
output sum;
|
||||
input logic a;
|
||||
input logic b;
|
||||
input logic c;
|
||||
input logic d;
|
||||
input logic cin;
|
||||
output logic cout;
|
||||
output logic carry;
|
||||
output logic sum;
|
||||
|
||||
logic TmpSum;
|
||||
|
||||
|
@ -20,17 +20,17 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
aligncntE, prodof, aeE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [62:52] xexp; // Exponent of multiplicand x
|
||||
input [62:52] yexp; // Exponent of multiplicand y
|
||||
input [62:52] zexp; // Exponent of addend z
|
||||
input xdenormE; // Z is denorm
|
||||
input ydenormE; // Z is denorm
|
||||
input zdenormE; // Z is denorm
|
||||
input xzeroE; // Z is denorm
|
||||
input yzeroE; // Z is denorm
|
||||
output [12:0] aligncntE; // shift count for alignment shifter
|
||||
output prodof; // X*Y exponent out of bounds
|
||||
output [12:0] aeE; //exponent of multiply
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic xdenormE; // Z is denorm
|
||||
input logic ydenormE; // Z is denorm
|
||||
input logic zdenormE; // Z is denorm
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [12:0] aligncntE; // shift count for alignment shifter
|
||||
output logic prodof; // X*Y exponent out of bounds
|
||||
output logic [12:0] aeE; //exponent of multiply
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -50,7 +50,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
// if exponent is out of bounds
|
||||
|
||||
|
||||
assign aeE = xzeroE|yzeroE ? 0 : xexp + yexp -1023;
|
||||
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
|
||||
|
||||
assign prodof = (aeE > 2046 && ~aeE[12]);
|
||||
|
||||
@ -61,7 +61,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
assign aligncntE = zexp -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
//assign aligncntE = zexp - aeE;// KEP use all of aeE
|
||||
|
||||
@ -87,3 +87,4 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -23,24 +23,24 @@ module expgen2(xexp, yexp, zexp,
|
||||
sumof, sumuf);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [62:52] xexp; // Exponent of multiplicand x
|
||||
input [62:52] yexp; // Exponent of multiplicand y
|
||||
input [62:52] zexp; // Exponent of addend z
|
||||
input sumzero; // sum exactly equals zero
|
||||
input resultdenorm; // postnormalize rounded result
|
||||
input infinity; // generate infinity on overflow
|
||||
input [4:0] FmaFlagsM; // Result invalid
|
||||
input inf; // Some input is infinity
|
||||
input nanM; // Some input is NaN
|
||||
input [12:0] de0; // X is NaN NaN
|
||||
input xnanM; // X is NaN
|
||||
input ynanM; // Y is NaN
|
||||
input znanM; // Z is NaN
|
||||
input expplus1;
|
||||
input specialsel; // Select special result
|
||||
output [62:52] wexp; // Exponent of result
|
||||
output sumof; // X*Y+Z exponent out of bounds
|
||||
output sumuf; // X*Y+Z exponent underflows
|
||||
input logic [62:52] xexp; // Exponent of multiplicand x
|
||||
input logic [62:52] yexp; // Exponent of multiplicand y
|
||||
input logic [62:52] zexp; // Exponent of addend z
|
||||
input logic sumzero; // sum exactly equals zero
|
||||
input logic resultdenorm; // postnormalize rounded result
|
||||
input logic infinity; // generate infinity on overflow
|
||||
input logic [4:0] FmaFlagsM; // Result invalid
|
||||
input logic inf; // Some input is infinity
|
||||
input logic nanM; // Some input is NaN
|
||||
input logic [12:0] de0; // X is NaN NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic expplus1;
|
||||
input logic specialsel; // Select special result
|
||||
output logic [62:52] wexp; // Exponent of result
|
||||
output logic sumof; // X*Y+Z exponent out of bounds
|
||||
output logic sumuf; // X*Y+Z exponent underflows
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -102,6 +102,7 @@ module expgen2(xexp, yexp, zexp,
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1;
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -11,17 +11,17 @@
|
||||
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input xnanE; // X is NaN
|
||||
input ynanE; // Y is NaN
|
||||
input znanE; // Z is NaN
|
||||
input prodof; // X*Y overflows exponent
|
||||
output nanE; // Some source is NaN
|
||||
input logic xnanE; // X is NaN
|
||||
input logic ynanE; // Y is NaN
|
||||
input logic znanE; // Z is NaN
|
||||
input logic prodof; // X*Y overflows exponent
|
||||
output logic nanE; // Some source is NaN
|
||||
|
||||
// Internal nodes
|
||||
|
||||
output prodinfE; // X*Y larger than max possible
|
||||
output logic prodinfE; // X*Y larger than max possible
|
||||
|
||||
// If any input is NaN, propagate the NaN
|
||||
// If any input logic is NaN, propagate the NaN
|
||||
|
||||
assign nanE = xnanE || ynanE || znanE;
|
||||
|
||||
|
@ -13,27 +13,27 @@ module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof,
|
||||
inf, nanM, FmaFlagsM,sticky,prodinfM);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input xnanM; // X is NaN
|
||||
input ynanM; // Y is NaN
|
||||
input znanM; // Z is NaN
|
||||
input xsign; // Sign of z
|
||||
input ysign; // Sign of z
|
||||
input zsign; // Sign of z
|
||||
input sticky; // X is Inf
|
||||
input prodinfM;
|
||||
input xinfM; // X is Inf
|
||||
input yinfM; // Y is Inf
|
||||
input zinfM; // Z is Inf
|
||||
input sumof; // X*Y + z underflows exponent
|
||||
input sumuf; // X*Y + z underflows exponent
|
||||
input xzeroM; // x = 0
|
||||
input yzeroM; // y = 0
|
||||
input zzeroM; // y = 0
|
||||
input killprodM;
|
||||
input [1:0] vbits; // R and S bits of result
|
||||
output inf; // Some source is Inf
|
||||
output nanM; // Some source is NaN
|
||||
output [4:0] FmaFlagsM;
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic xsign; // Sign of z
|
||||
input logic ysign; // Sign of z
|
||||
input logic zsign; // Sign of z
|
||||
input logic sticky; // X is Inf
|
||||
input logic prodinfM;
|
||||
input logic xinfM; // X is Inf
|
||||
input logic yinfM; // Y is Inf
|
||||
input logic zinfM; // Z is Inf
|
||||
input logic sumof; // X*Y + z underflows exponent
|
||||
input logic sumuf; // X*Y + z underflows exponent
|
||||
input logic xzeroM; // x = 0
|
||||
input logic yzeroM; // y = 0
|
||||
input logic zzeroM; // y = 0
|
||||
input logic killprodM;
|
||||
input logic [1:0] vbits; // R and S bits of result
|
||||
output logic inf; // Some source is Inf
|
||||
input logic nanM; // Some source is NaN
|
||||
output logic [4:0] FmaFlagsM;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -55,8 +55,8 @@ logic suminf;
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input is denormalized
|
||||
// 2) Output would be denormalized or smaller
|
||||
// 1) Any input logic is denormalized
|
||||
// 2) output logic would be denormalized or smaller
|
||||
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
|
||||
|
||||
@ -70,7 +70,7 @@ logic suminf;
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
|
||||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
|
||||
|
@ -35,37 +35,37 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
xinfE, yinfE, zinfE, nanE, prodinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] ReadData1E; // input 1
|
||||
input [63:0] ReadData2E; // input 2
|
||||
input [63:0] ReadData3E; // input 3
|
||||
input [2:0] FrmE; // Rounding mode
|
||||
output [12:0] aligncntE; // status flags
|
||||
output [105:0] rE; // one result of partial product sum
|
||||
output [105:0] sE; // other result of partial products
|
||||
output [163:0] tE; // output of alignment shifter
|
||||
output [12:0] aeE; // multiplier expoent
|
||||
output bsE; // sticky bit of addend
|
||||
output killprodE; // ReadData3E >> product
|
||||
output xzeroE;
|
||||
output yzeroE;
|
||||
output zzeroE;
|
||||
output xdenormE;
|
||||
output ydenormE;
|
||||
output zdenormE;
|
||||
output xinfE;
|
||||
output yinfE;
|
||||
output zinfE;
|
||||
output xnanE;
|
||||
output ynanE;
|
||||
output znanE;
|
||||
output nanE;
|
||||
output prodinfE;
|
||||
output [8:0] sumshiftE;
|
||||
output sumshiftzeroE;
|
||||
input logic [63:0] ReadData1E; // input 1
|
||||
input logic [63:0] ReadData2E; // input 2
|
||||
input logic [63:0] ReadData3E; // input 3
|
||||
input logic [2:0] FrmE; // Rounding mode
|
||||
output logic [12:0] aligncntE; // status flags
|
||||
output logic [105:0] rE; // one result of partial product sum
|
||||
output logic [105:0] sE; // other result of partial products
|
||||
output logic [163:0] tE; // output logic of alignment shifter
|
||||
output logic [12:0] aeE; // multiplier expoent
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // ReadData3E >> product
|
||||
output logic xzeroE;
|
||||
output logic yzeroE;
|
||||
output logic zzeroE;
|
||||
output logic xdenormE;
|
||||
output logic ydenormE;
|
||||
output logic zdenormE;
|
||||
output logic xinfE;
|
||||
output logic yinfE;
|
||||
output logic zinfE;
|
||||
output logic xnanE;
|
||||
output logic ynanE;
|
||||
output logic znanE;
|
||||
output logic nanE;
|
||||
output logic prodinfE;
|
||||
output logic [8:0] sumshiftE;
|
||||
output logic sumshiftzeroE;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
// output [12:0] aligncntE; // shift count for alignment
|
||||
// output logic [12:0] aligncntE; // shift count for alignment
|
||||
|
||||
|
||||
logic prodof; // ReadData1E*ReadData2E out of range
|
||||
@ -95,7 +95,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
special special(.*);
|
||||
|
||||
|
||||
// Instantiate control output
|
||||
// Instantiate control output logic
|
||||
|
||||
flag1 flag1(.*);
|
||||
|
||||
|
@ -15,13 +15,13 @@
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to ReadData1M or ReadData3M inputs
|
||||
// bypass Handles bypass of result to ReadData1M or ReadData3M input logics
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
// special Catch special cases (input logics = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the ReadData1M or ReadData3M inputs for use on the next cycle. In addition, four signals
|
||||
// the ReadData1M or ReadData3M input logics for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IMMM flags.
|
||||
@ -39,38 +39,38 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
|
||||
);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] ReadData1M; // input 1
|
||||
input [63:0] ReadData2M; // input 2
|
||||
input [63:0] ReadData3M; // input 3
|
||||
input [2:0] FrmM; // Rounding mode
|
||||
input [12:0] aligncntM; // status flags
|
||||
input [105:0] rM; // one result of partial product sum
|
||||
input [105:0] sM; // other result of partial products
|
||||
input [163:0] tM; // output of alignment shifter
|
||||
input [8:0] normcntM; // shift count for normalizer
|
||||
input [12:0] aeM; // multiplier expoent
|
||||
input bsM; // sticky bit of addend
|
||||
input killprodM; // ReadData3M >> product
|
||||
input prodinfM;
|
||||
input xzeroM;
|
||||
input yzeroM;
|
||||
input zzeroM;
|
||||
input xdenormM;
|
||||
input ydenormM;
|
||||
input zdenormM;
|
||||
input xinfM;
|
||||
input yinfM;
|
||||
input zinfM;
|
||||
input xnanM;
|
||||
input ynanM;
|
||||
input znanM;
|
||||
input nanM;
|
||||
input [8:0] sumshiftM;
|
||||
input sumshiftzeroM;
|
||||
input logic [63:0] ReadData1M; // input logic 1
|
||||
input logic [63:0] ReadData2M; // input logic 2
|
||||
input logic [63:0] ReadData3M; // input logic 3
|
||||
input logic [2:0] FrmM; // Rounding mode
|
||||
input logic [12:0] aligncntM; // status flags
|
||||
input logic [105:0] rM; // one result of partial product sum
|
||||
input logic [105:0] sM; // other result of partial products
|
||||
input logic [163:0] tM; // output of alignment shifter
|
||||
input logic [8:0] normcntM; // shift count for normalizer
|
||||
input logic [12:0] aeM; // multiplier expoent
|
||||
input logic bsM; // sticky bit of addend
|
||||
input logic killprodM; // ReadData3M >> product
|
||||
input logic prodinfM;
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
input logic zzeroM;
|
||||
input logic xdenormM;
|
||||
input logic ydenormM;
|
||||
input logic zdenormM;
|
||||
input logic xinfM;
|
||||
input logic yinfM;
|
||||
input logic zinfM;
|
||||
input logic xnanM;
|
||||
input logic ynanM;
|
||||
input logic znanM;
|
||||
input logic nanM;
|
||||
input logic [8:0] sumshiftM;
|
||||
input logic sumshiftzeroM;
|
||||
|
||||
|
||||
input [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
|
||||
output [4:0] FmaFlagsM; // status flags
|
||||
output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
|
||||
output logic [4:0] FmaFlagsM; // status flags
|
||||
|
||||
|
||||
// Internal nodes
|
||||
|
@ -12,14 +12,13 @@
|
||||
module lza(sum, normcnt, sumzero);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [163:0] sum; // sum
|
||||
output [8:0] normcnt; // normalization shift count
|
||||
output sumzero; // sum = 0
|
||||
input logic [163:0] sum; // sum
|
||||
output logic [8:0] normcnt; // normalization shift count
|
||||
output logic sumzero; // sum = 0
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [8:0] i; // loop index
|
||||
reg [8:0] normcnt; // normalization shift count
|
||||
|
||||
// A real LOP uses a fast carry chain to find only the first 0.
|
||||
// It is an example of a parallel prefix algorithm. For the sake
|
||||
@ -27,7 +26,7 @@ module lza(sum, normcnt, sumzero);
|
||||
// A real LOP would also operate on the sources of the adder, not
|
||||
// the result!
|
||||
|
||||
always @ ( sum)
|
||||
always_comb
|
||||
begin
|
||||
i = 0;
|
||||
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one
|
||||
|
@ -2,31 +2,32 @@
|
||||
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [51:0] xman; // Fraction of multiplicand x
|
||||
input [51:0] yman; // Fraction of multiplicand y
|
||||
input xdenormE; // is x denormalized
|
||||
input ydenormE; // is y denormalized
|
||||
input xzeroE; // Z is denorm
|
||||
input yzeroE; // Z is denorm
|
||||
output [105:0] rE; // partial product 1
|
||||
output [105:0] sE; // partial product 2
|
||||
input logic [51:0] xman; // Fraction of multiplicand x
|
||||
input logic [51:0] yman; // Fraction of multiplicand y
|
||||
input logic xdenormE; // is x denormalized
|
||||
input logic ydenormE; // is y denormalized
|
||||
input logic xzeroE; // Z is denorm
|
||||
input logic yzeroE; // Z is denorm
|
||||
output logic [105:0] rE; // partial product 1
|
||||
output logic [105:0] sE; // partial product 2
|
||||
|
||||
wire [54:0] yExt; //y with appended 0 and assumed 1
|
||||
wire [53:0] xExt; //y with assumed 1
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [17:0][105:0] lv1add;
|
||||
logic [11:0][105:0] lv2add;
|
||||
logic [7:0][105:0] lv3add;
|
||||
logic [3:0][105:0] lv4add;
|
||||
logic [21:0][106:0] carryTmp;
|
||||
wire [26:0][105:0] acc;
|
||||
logic [106:0] tmpsE;
|
||||
logic [17:0][106:0] lv1add;
|
||||
logic [11:0][106:0] lv2add;
|
||||
logic [7:0][106:0] lv3add;
|
||||
logic [3:0][106:0] lv4add;
|
||||
logic [21:0][107:0] carryTmp;
|
||||
wire [26:0][106:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
assign xExt = {2'b0,~(xdenormE|xzeroE),xman};
|
||||
assign yExt = {2'b0,~(ydenormE|yzeroE),yman, 1'b0};
|
||||
assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
|
||||
assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
|
||||
|
||||
generate
|
||||
for(i=0; i<27; i=i+1) begin
|
||||
@ -35,69 +36,70 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
endgenerate
|
||||
|
||||
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//*** resize adders
|
||||
generate
|
||||
for(i=0; i<9; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
|
||||
add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<6; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
|
||||
add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<4; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
|
||||
add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
generate
|
||||
for(i=0; i<2; i=i+1) begin
|
||||
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
|
||||
assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(sE));
|
||||
add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(tmpsE));
|
||||
assign sE = tmpsE[105:0];
|
||||
assign rE = {carryTmp[21][104:0], 1'b0};
|
||||
// assign rE = 0;
|
||||
// assign sE = acc[0] +
|
||||
@ -131,3 +133,4 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
|
||||
// assign rE = 0;
|
||||
endmodule
|
||||
|
||||
|
@ -17,35 +17,31 @@
|
||||
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
|
||||
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
input [163:0] sum; // sum
|
||||
input [62:52] zexp; // sum
|
||||
input [8:0] normcnt; // normalization shift count
|
||||
input [12:0] aeM; // normalization shift count
|
||||
input [12:0] aligncntM; // normalization shift count
|
||||
input [8:0] sumshiftM; // normalization shift count
|
||||
input sumshiftzeroM;
|
||||
input sumzero; // sum is zero
|
||||
input bsM; // sticky bit for addend
|
||||
input xdenormM; // Input Z is denormalized
|
||||
input ydenormM; // Input Z is denormalized
|
||||
input zdenormM; // Input Z is denormalized
|
||||
input xzeroM;
|
||||
input yzeroM;
|
||||
input zzeroM;
|
||||
output sticky; //sticky bit
|
||||
output [12:0] de0;
|
||||
output resultdenorm; // Input Z is denormalized
|
||||
output [53:0] v; // normalized sum, R, S bits
|
||||
input logic [163:0] sum; // sum
|
||||
input logic [62:52] zexp; // sum
|
||||
input logic [8:0] normcnt; // normalization shift count
|
||||
input logic [12:0] aeM; // normalization shift count
|
||||
input logic [12:0] aligncntM; // normalization shift count
|
||||
input logic [8:0] sumshiftM; // normalization shift count
|
||||
input logic sumshiftzeroM;
|
||||
input logic sumzero; // sum is zero
|
||||
input logic bsM; // sticky bit for addend
|
||||
input logic xdenormM; // Input Z is denormalized
|
||||
input logic ydenormM; // Input Z is denormalized
|
||||
input logic zdenormM; // Input Z is denormalized
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
input logic zzeroM;
|
||||
output logic sticky; //sticky bit
|
||||
output logic [12:0] de0;
|
||||
output logic resultdenorm; // Input Z is denormalized
|
||||
output logic [53:0] v; // normalized sum, R, S bits
|
||||
|
||||
// Internal nodes
|
||||
|
||||
reg [53:0] v; // normalized sum, R, S bits
|
||||
logic resultdenorm; // Input Z is denormalized
|
||||
logic [12:0] de0;
|
||||
logic [163:0] sumshifted; // shifted sum
|
||||
logic [163:0] sumshifted; // shifted sum
|
||||
logic [9:0] sumshifttmp;
|
||||
logic [163:0] sumshiftedtmp; // shifted sum
|
||||
logic sticky;
|
||||
logic isShiftLeft1;
|
||||
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
@ -60,28 +56,28 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
|
||||
assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
|
||||
always @(sum or sumshiftM or aeM or aligncntM or normcnt or bsM or isShiftLeft1 or zexp or zdenormM)
|
||||
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
|
||||
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
|
||||
always_comb
|
||||
begin
|
||||
// d = aligncntM
|
||||
// l = normcnt
|
||||
// p = 53
|
||||
// ea + eb = aeM
|
||||
// set d<=2 to d<=0
|
||||
if ($signed(aligncntM)<=$signed(2)) begin //d<=2
|
||||
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
|
||||
// product anchored or cancellation
|
||||
if ($signed(aeM-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
|
||||
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
|
||||
//normal result
|
||||
de0 = xzeroM|yzeroM ? zexp : aeM-normcnt+xdenormM+ydenormM+57;
|
||||
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
|
||||
resultdenorm = |sum & ~|de0 | de0[12];
|
||||
// if z is zero then there was a 56 bit shift of the product
|
||||
sumshifted = resultdenorm ? sum << sumshiftM-zzeroM+isShiftLeft1 : sum << normcnt; // p+2+l
|
||||
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
//de0 = aeM-normcnt+2-1023;
|
||||
end else begin
|
||||
sumshifted = sum << (1080+aeM);
|
||||
sumshifted = sum << (13'd1080+aeM);
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
resultdenorm = 1;
|
||||
@ -100,29 +96,29 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
// the book says exp = zexp + {-1,0,1}
|
||||
if(sumshiftzeroM) begin
|
||||
v = sum[162:109];
|
||||
sticky = sum[108:0] | bsM;
|
||||
de0 = zexp;
|
||||
sticky = (|sum[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp};
|
||||
end else if(sumshifted[163] & ~sumshifttmp[9])begin
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
de0 = zexp +2;
|
||||
de0 = {2'b0,zexp} +13'd2;
|
||||
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
|
||||
v = sumshifted[161:108];
|
||||
sticky = (|sumshifted[107:0]) | bsM;
|
||||
de0 = zexp+1;
|
||||
de0 = {2'b0,zexp}+13'd1;
|
||||
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp+zdenormM;
|
||||
de0 = {2'b0,zexp}+{12'b0,zdenormM};
|
||||
end else if(sumshifted[160]& ~zdenormM) begin
|
||||
de0 = zexp-1;
|
||||
de0 = {2'b0,zexp}-13'b1;
|
||||
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
|
||||
sticky = (|sumshifted[105:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(sumshifted[159]& ~zdenormM) begin
|
||||
//v = sumshifted[158:105];
|
||||
de0 = zexp-2;
|
||||
de0 = {2'b0,zexp}-13'd2;
|
||||
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
|
||||
sticky = (|sumshifted[104:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
@ -130,7 +126,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp;
|
||||
de0 = {{2{zexp[62]}},zexp};
|
||||
end else begin
|
||||
de0 = 0;
|
||||
sumshifted = sum << sumshiftM-1; // p+2+l
|
||||
@ -148,3 +144,4 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z inputs, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
//
|
||||
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
|
||||
// muxed in to form the actual result for register file writeback. This
|
||||
@ -19,23 +19,23 @@ module round(v, sticky, FrmM, wsign,
|
||||
wman, infinity, specialsel,expplus1);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [53:0] v; // normalized sum, R, S bits
|
||||
input sticky; //sticky bit
|
||||
input [2:0] FrmM;
|
||||
input wsign; // Sign of result
|
||||
input [4:0] FmaFlagsM;
|
||||
input inf; // Some input is infinity
|
||||
input nanM; // Some input is NaN
|
||||
input xnanM; // X is NaN
|
||||
input ynanM; // Y is NaN
|
||||
input znanM; // Z is NaN
|
||||
input [51:0] xman; // Input X
|
||||
input [51:0] yman; // Input Y
|
||||
input [51:0] zman; // Input Z
|
||||
output [51:0] wman; // rounded result of FMAC
|
||||
output infinity; // Generate infinity on overflow
|
||||
output specialsel; // Select special result
|
||||
output expplus1;
|
||||
input logic [53:0] v; // normalized sum, R, S bits
|
||||
input logic sticky; //sticky bit
|
||||
input logic [2:0] FrmM;
|
||||
input logic wsign; // Sign of result
|
||||
input logic [4:0] FmaFlagsM;
|
||||
input logic inf; // Some input logic is infinity
|
||||
input logic nanM; // Some input logic is NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic [51:0] xman; // input logic X
|
||||
input logic [51:0] yman; // input logic Y
|
||||
input logic [51:0] zman; // input logic Z
|
||||
output logic [51:0] wman; // rounded result of FMAC
|
||||
output logic infinity; // Generate infinity on overflow
|
||||
output logic specialsel; // Select special result
|
||||
output logic expplus1;
|
||||
|
||||
// Internal nodes
|
||||
|
||||
@ -56,7 +56,7 @@ module round(v, sticky, FrmM, wsign,
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - plus1 if v[2] = 1
|
||||
// 101/110/111 - plus1
|
||||
always @ (FrmM, v, wsign, sticky) begin
|
||||
always_comb begin
|
||||
case (FrmM)
|
||||
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
|
||||
3'b001: plus1 = 0;//round to zero
|
||||
@ -85,7 +85,7 @@ module round(v, sticky, FrmM, wsign,
|
||||
// The special result mux is a 4:1 mux that should not appear in the
|
||||
// critical path of the machine. It is not priority encoded, despite
|
||||
// the code below suggesting otherwise. Also, several of the identical data
|
||||
// inputs to the wide muxes can be combined at the expense of more
|
||||
// input logics to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
|
||||
@ -102,15 +102,15 @@ module round(v, sticky, FrmM, wsign,
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
// propagate an input if the input is NaN. Since we assume all
|
||||
// NaN inputs are already quiet, we don't have to force them quiet.
|
||||
// propagate an input logic if the input logic is NaN. Since we assume all
|
||||
// NaN input logics are already quiet, we don't have to force them quiet.
|
||||
|
||||
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input NaNs will provide the payload."
|
||||
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input logic NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input logic NaNs will provide the payload."
|
||||
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
|
||||
|
||||
// Select result with 4:1 mux
|
||||
|
@ -14,30 +14,28 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
|
||||
////////////////////////////////////////////////////////////////////////////I
|
||||
|
||||
input xsign; // Sign of X
|
||||
input ysign; // Sign of Y
|
||||
input zsign; // Sign of Z
|
||||
input isAdd;
|
||||
input negsum0; // Sum in +O mode is negative
|
||||
input negsum1; // Sum in +1 mode is negative
|
||||
input bsM; // sticky bit from addend
|
||||
input [2:0] FrmM; // Round toward minus infinity
|
||||
input [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input sumzero; // Sum = O
|
||||
input zinfM; // Y = Inf
|
||||
input inf; // Some input = Inf
|
||||
output wsign; // Sign of W
|
||||
output invz; // Invert addend into adder
|
||||
output negsum; // Negate result of adder
|
||||
output selsum1; // Select +1 mode from compound adder
|
||||
input logic xsign; // Sign of X
|
||||
input logic ysign; // Sign of Y
|
||||
input logic zsign; // Sign of Z
|
||||
input logic isAdd;
|
||||
input logic negsum0; // Sum in +O mode is negative
|
||||
input logic negsum1; // Sum in +1 mode is negative
|
||||
input logic bsM; // sticky bit from addend
|
||||
input logic [2:0] FrmM; // Round toward minus infinity
|
||||
input logic [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input logic sumzero; // Sum = O
|
||||
input logic zinfM; // Y = Inf
|
||||
input logic inf; // Some input logic = Inf
|
||||
output logic wsign; // Sign of W
|
||||
output logic invz; // Invert addend into adder
|
||||
output logic negsum; // Negate result of adder
|
||||
output logic selsum1; // Select +1 mode from compound adder
|
||||
|
||||
// Internal nodes
|
||||
|
||||
wire zerosign; // sign if result= 0
|
||||
wire sumneg; // sign if result= 0
|
||||
wire infsign; // sign if result= Inf
|
||||
reg negsum; // negate result of adder
|
||||
reg selsum1; // select +1 mode from compound adder
|
||||
logic tmp;
|
||||
|
||||
// Compute sign of product
|
||||
@ -59,7 +57,7 @@ logic tmp;
|
||||
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
|
||||
//always @(invz or negsum0 or negsum1 or bsM or ps)
|
||||
// begin
|
||||
// if (~invz) begin // both inputs have same sign
|
||||
// if (~invz) begin // both input logics have same sign
|
||||
// negsum = 0;
|
||||
// selsum1 = 0;
|
||||
// end else if (bsM) begin // sticky bit set on addend
|
||||
@ -84,7 +82,7 @@ logic tmp;
|
||||
// Sign calculation is not in the critical path so the cases
|
||||
// can be tolerated.
|
||||
// IEEE 754-2008 section 6.3 states
|
||||
// "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// also pertaining to negZero it states:
|
||||
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
|
@ -14,21 +14,21 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
|
||||
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input [63:0] ReadData1E; // Input ReadData1E
|
||||
input [63:0] ReadData2E; // Input ReadData2E
|
||||
input [63:0] ReadData3E; // Input ReadData3E
|
||||
output xzeroE; // Input ReadData1E = 0
|
||||
output yzeroE; // Input ReadData2E = 0
|
||||
output zzeroE; // Input ReadData3E = 0
|
||||
output xnanE; // ReadData1E is NaN
|
||||
output ynanE; // ReadData2E is NaN
|
||||
output znanE; // ReadData3E is NaN
|
||||
output xdenormE; // ReadData1E is denormalized
|
||||
output ydenormE; // ReadData2E is denormalized
|
||||
output zdenormE; // ReadData3E is denormalized
|
||||
output xinfE; // ReadData1E is infinity
|
||||
output yinfE; // ReadData2E is infinity
|
||||
output zinfE; // ReadData3E is infinity
|
||||
input logic [63:0] ReadData1E; // Input ReadData1E
|
||||
input logic [63:0] ReadData2E; // Input ReadData2E
|
||||
input logic [63:0] ReadData3E; // Input ReadData3E
|
||||
output logic xzeroE; // Input ReadData1E = 0
|
||||
output logic yzeroE; // Input ReadData2E = 0
|
||||
output logic zzeroE; // Input ReadData3E = 0
|
||||
output logic xnanE; // ReadData1E is NaN
|
||||
output logic ynanE; // ReadData2E is NaN
|
||||
output logic znanE; // ReadData3E is NaN
|
||||
output logic xdenormE; // ReadData1E is denormalized
|
||||
output logic ydenormE; // ReadData2E is denormalized
|
||||
output logic zdenormE; // ReadData3E is denormalized
|
||||
output logic xinfE; // ReadData1E is infinity
|
||||
output logic yinfE; // ReadData2E is infinity
|
||||
output logic zinfE; // ReadData3E is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
@ -60,7 +60,7 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
|
||||
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
|
||||
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
|
||||
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
|
||||
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
|
||||
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
|
||||
assign xzeroE = ~(|ReadData1E[62:0]);
|
||||
assign yzeroE = ~(|ReadData2E[62:0]);
|
||||
assign zzeroE = ~(|ReadData3E[62:0]);
|
||||
|
@ -1 +1 @@
|
||||
testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat
|
||||
testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat
|
||||
|
@ -34,6 +34,7 @@ module add(rM, sM, tM, sum,
|
||||
wire [164:0] sum0; // sum of compound adder +0 mode
|
||||
wire [164:0] sum1; // sum of compound adder +1 mode
|
||||
wire [163:0] prodshifted; // sum of compound adder +1 mode
|
||||
wire [164:0] tmp; // sum of compound adder +1 mode
|
||||
|
||||
// Invert addend if z'sM sign is diffrent from the product'sM sign
|
||||
|
||||
@ -44,12 +45,13 @@ module add(rM, sM, tM, sum,
|
||||
assign r2 = killprodM ? 106'b0 : rM;
|
||||
assign s2 = killprodM ? 106'b0 : sM;
|
||||
|
||||
//replace this with a more structural cpa that synthisises better
|
||||
//***replace this with a more structural cpa that synthisises better
|
||||
// Compound adder
|
||||
// Consists of 3:2 CSA followed by long compound CPA
|
||||
// assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0};
|
||||
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0}; // +1 from invert of z above
|
||||
//assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
|
||||
//assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
|
||||
assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
|
||||
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
|
||||
|
||||
// Check sign bits in +0/1 modes
|
||||
assign negsum0 = sum0[164];
|
||||
@ -60,3 +62,4 @@ module add(rM, sM, tM, sum,
|
||||
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -88,15 +88,15 @@ module BLOCK2A ( PIN2, GIN1, GIN2, GOUT );
|
||||
assign GOUT = ~ (GIN2 | (PIN2 & GIN1));
|
||||
|
||||
endmodule
|
||||
|
||||
//***KEP all 0:63, 0:64 ect changed - changed due to lint warning
|
||||
module PRESTAGE_64 ( A, B, CIN, POUT, GOUT );
|
||||
|
||||
input [0:63] A;
|
||||
input [0:63] B;
|
||||
input [63:0] A;
|
||||
input [63:0] B;
|
||||
input CIN;
|
||||
|
||||
output [0:63] POUT;
|
||||
output [0:64] GOUT;
|
||||
output [63:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] );
|
||||
BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] );
|
||||
@ -169,11 +169,11 @@ endmodule // PRESTAGE_64
|
||||
|
||||
module DBLC_0_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [0:63] PIN;
|
||||
input [0:64] GIN;
|
||||
input [63:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:62] POUT;
|
||||
output [0:64] GOUT;
|
||||
output [62:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] );
|
||||
@ -246,11 +246,11 @@ endmodule // DBLC_0_64
|
||||
|
||||
module DBLC_1_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [0:62] PIN;
|
||||
input [0:64] GIN;
|
||||
input [62:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:60] POUT;
|
||||
output [0:64] GOUT;
|
||||
output [60:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
@ -323,11 +323,11 @@ endmodule // DBLC_1_64
|
||||
|
||||
module DBLC_2_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [0:60] PIN;
|
||||
input [0:64] GIN;
|
||||
input [60:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:56] POUT;
|
||||
output [0:64] GOUT;
|
||||
output [56:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
@ -400,11 +400,11 @@ endmodule // DBLC_2_64
|
||||
|
||||
module DBLC_3_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [0:56] PIN;
|
||||
input [0:64] GIN;
|
||||
input [56:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:48] POUT;
|
||||
output [0:64] GOUT;
|
||||
output [48:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
@ -477,11 +477,11 @@ endmodule // DBLC_3_64
|
||||
|
||||
module DBLC_4_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [0:48] PIN;
|
||||
input [0:64] GIN;
|
||||
input [48:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:32] POUT;
|
||||
output [0:64] GOUT;
|
||||
output [32:0] POUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
@ -554,11 +554,11 @@ endmodule // DBLC_4_64
|
||||
|
||||
module DBLC_5_64 ( PIN, GIN, POUT, GOUT );
|
||||
|
||||
input [0:32] PIN;
|
||||
input [0:64] GIN;
|
||||
input [32:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:0] POUT;
|
||||
output [0:64] GOUT;
|
||||
output [64:0] GOUT;
|
||||
|
||||
INVBLOCK U10 (GIN[0] , GOUT[0] );
|
||||
INVBLOCK U11 (GIN[1] , GOUT[1] );
|
||||
@ -631,12 +631,12 @@ endmodule // DBLC_5_64
|
||||
|
||||
module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT );
|
||||
|
||||
input [0:63] A;
|
||||
input [0:63] B;
|
||||
input [63:0] A;
|
||||
input [63:0] B;
|
||||
input PBIT;
|
||||
input [0:64] CARRY;
|
||||
input [64:0] CARRY;
|
||||
|
||||
output [0:63] SUM;
|
||||
output [63:0] SUM;
|
||||
output COUT;
|
||||
|
||||
XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] );
|
||||
@ -710,22 +710,22 @@ endmodule // XORSTAGE_64
|
||||
|
||||
module DBLCTREE_64 ( PIN, GIN, GOUT, POUT );
|
||||
|
||||
input [0:63] PIN;
|
||||
input [0:64] GIN;
|
||||
input [63:0] PIN;
|
||||
input [64:0] GIN;
|
||||
|
||||
output [0:64] GOUT;
|
||||
output [64:0] GOUT;
|
||||
output [0:0] POUT;
|
||||
|
||||
wire [0:62] INTPROP_0;
|
||||
wire [0:64] INTGEN_0;
|
||||
wire [0:60] INTPROP_1;
|
||||
wire [0:64] INTGEN_1;
|
||||
wire [0:56] INTPROP_2;
|
||||
wire [0:64] INTGEN_2;
|
||||
wire [0:48] INTPROP_3;
|
||||
wire [0:64] INTGEN_3;
|
||||
wire [0:32] INTPROP_4;
|
||||
wire [0:64] INTGEN_4;
|
||||
wire [62:0] INTPROP_0;
|
||||
wire [64:0] INTGEN_0;
|
||||
wire [60:0] INTPROP_1;
|
||||
wire [64:0] INTGEN_1;
|
||||
wire [56:0] INTPROP_2;
|
||||
wire [64:0] INTGEN_2;
|
||||
wire [48:0] INTPROP_3;
|
||||
wire [64:0] INTGEN_3;
|
||||
wire [32:0] INTPROP_4;
|
||||
wire [64:0] INTGEN_4;
|
||||
|
||||
DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) );
|
||||
DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) );
|
||||
@ -739,20 +739,20 @@ endmodule // DBLCTREE_64
|
||||
|
||||
module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT );
|
||||
|
||||
input [0:63] OPA;
|
||||
input [0:63] OPB;
|
||||
input [63:0] OPA;
|
||||
input [63:0] OPB;
|
||||
input CIN;
|
||||
|
||||
output [0:63] SUM;
|
||||
output [63:0] SUM;
|
||||
output COUT;
|
||||
|
||||
wire [0:63] INTPROP;
|
||||
wire [0:64] INTGEN;
|
||||
wire [63:0] INTPROP;
|
||||
wire [64:0] INTGEN;
|
||||
wire [0:0] PBIT;
|
||||
wire [0:64] CARRY;
|
||||
wire [64:0] CARRY;
|
||||
|
||||
PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN );
|
||||
DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT );
|
||||
XORSTAGE_64 U3 (OPA[0:63] , OPB[0:63] , PBIT[0] , CARRY[0:64] , SUM , COUT );
|
||||
XORSTAGE_64 U3 (OPA[63:0] , OPB[63:0] , PBIT[0] , CARRY[64:0] , SUM , COUT );
|
||||
|
||||
endmodule
|
||||
|
@ -30,21 +30,10 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
// Internal nodes
|
||||
|
||||
reg [215:0] shift; // aligned addend from shifter
|
||||
logic zexpsel; // sticky bit of product
|
||||
reg [7:0] i; // temp storage for finding sticky bit
|
||||
wire [52:0] z1; // Z plus 1
|
||||
wire [51:0] z2; // Z selected after handling rounds
|
||||
logic [12:0] tmp;
|
||||
|
||||
|
||||
|
||||
// Compute sign of aligncntE + 104 to check for shifting too far right
|
||||
|
||||
//assign align104 = aligncntE+104;
|
||||
|
||||
// Shift addend by alignment count. Generate sticky bits from
|
||||
// addend on right shifts. Handle special cases of shifting
|
||||
// by too much.
|
||||
//***change always @ to always_combs
|
||||
always_comb
|
||||
begin
|
||||
|
||||
@ -55,32 +44,34 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
killprodE = xzeroE | yzeroE;
|
||||
// d = aligncntE
|
||||
// p = 53
|
||||
//***try reducing this hardware try getting onw shifter
|
||||
if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1
|
||||
//***try reducing this hardware to use one shifter
|
||||
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
|
||||
//product ancored case with saturated shift
|
||||
sumshiftE = 163; // 3p+4
|
||||
sumshiftzeroE = 0;
|
||||
shift = {1'b1,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if($signed(aligncntE) <= $signed(2)) begin // -2p+1<d<=2
|
||||
|
||||
end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
|
||||
// product ancored or cancellation
|
||||
sumshiftE = 57-aligncntE; // p + 2 - d
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0]; // p + 2 - d
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman,163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
//zexpsel = 0;
|
||||
end else if ($signed(aligncntE)<=$signed(55)) begin // 2 < d <= p+2
|
||||
|
||||
end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
|
||||
// addend ancored case
|
||||
// used to be 56 \/ somthing doesn'tE seem right too many typos
|
||||
sumshiftE = 57-aligncntE;
|
||||
// used to be 56 \/ somthing doesn't seem right too many typos
|
||||
tmp = 13'd57-aligncntE;
|
||||
sumshiftE = tmp[8:0];
|
||||
sumshiftzeroE = 0;
|
||||
shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
//zexpsel = 1;
|
||||
|
||||
end else begin // d >= p+3
|
||||
// addend anchored case with saturated shift
|
||||
sumshiftE = 0;
|
||||
@ -89,15 +80,9 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
|
||||
tE = zzeroE ? 0 : {shift[215:52]};
|
||||
bsE = |(shift[51:0]);
|
||||
killprodE = 1;
|
||||
//ps = 1;
|
||||
//zexpsel = 1;
|
||||
|
||||
// use some behavioral code to find sticky bit. This is really
|
||||
// done by hardware in the shifter.
|
||||
//if (aligncntE < 0)
|
||||
// for (i=0; i<-aligncntE-52; i = i+1)
|
||||
// bsE = bsE || z2[i];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -31,6 +31,11 @@ module kogge_stone (h, c, p, g);
|
||||
|
||||
output [15:1] h;
|
||||
output [15:1] c;
|
||||
logic H_1_0,H_2_1,I_2_1,H_3_2,I_3_2,H_4_3,I_4_3,H_5_4,I_5_4,H_6_5,I_6_5,H_7_6,I_7_6,H_8_7,I_8_7,H_9_8,I_9_8,H_10_9
|
||||
,I_10_9,H_11_10,I_11_10,H_12_11,I_12_11,H_13_12,I_13_12,H_14_13,I_14_13,H_2_0,H_3_0,H_4_1,I_4_1,H_5_2,I_5_2,H_6_3
|
||||
,I_6_3,H_7_4,I_7_4,H_8_5,I_8_5,H_9_6,I_9_6,H_10_7,I_10_7,H_11_8,I_11_8,H_12_9,I_12_9,H_13_10,I_13_10,H_14_11,I_14_11
|
||||
,H_4_0,H_5_0,H_6_0,H_7_0,H_8_1,I_8_1,H_9_2,I_9_2,H_10_3,I_10_3,H_11_4,I_11_4,H_12_5,I_12_5,H_13_6,I_13_6,H_14_7
|
||||
,I_14_7,H_8_0,H_9_0,H_10_0,H_11_0,H_12_0,H_13_0,H_14_0;
|
||||
|
||||
// parallel-prefix, Kogge-Stone
|
||||
|
||||
|
@ -22,9 +22,6 @@ module booth(xExt, choose, add1, e, pp);
|
||||
3'b100 : pp = {negx, 1'b0}; // -2
|
||||
3'b101 : pp = {1'b1, negx}; // -1
|
||||
3'b110 : pp = {1'b1, negx}; // -1
|
||||
// *** <Thomas Fleming> I changed this to fix a lint error. '1 should
|
||||
// fill the signal with all ones.
|
||||
// 3'b111 : pp = 55'hfffffffffffffff;
|
||||
3'b111 : pp = '1; // -0
|
||||
endcase
|
||||
|
||||
|
@ -9,7 +9,7 @@ module cla12 (S, CO, X, Y);
|
||||
output [11:0] S;
|
||||
output CO;
|
||||
|
||||
wire [0:63] A,B,Q;
|
||||
wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning
|
||||
wire LOGIC0;
|
||||
wire CIN;
|
||||
wire CO_64;
|
||||
@ -174,10 +174,11 @@ module cla_sub12 (S, X, Y);
|
||||
|
||||
output [11:0] S;
|
||||
|
||||
wire [0:63] A,B,Q,Bbar;
|
||||
wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
|
||||
wire CO;
|
||||
wire LOGIC0;
|
||||
wire VDD;
|
||||
logic CO_12;
|
||||
|
||||
assign Bbar = ~B;
|
||||
assign LOGIC0 = 0;
|
||||
|
@ -9,7 +9,7 @@ module cla52 (S, CO, X, Y);
|
||||
output [51:0] S;
|
||||
output CO;
|
||||
|
||||
wire [0:63] A,B,Q;
|
||||
wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning
|
||||
wire LOGIC0;
|
||||
wire CIN;
|
||||
wire CO_64;
|
||||
@ -211,7 +211,7 @@ module cla_sub52 (S, X, Y);
|
||||
|
||||
output [51:0] S;
|
||||
|
||||
wire [0:63] A,B,Q,Bbar;
|
||||
wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
|
||||
wire LOGIC0;
|
||||
wire CIN;
|
||||
wire CO_52;
|
||||
|
@ -9,7 +9,7 @@ module cla64 (S, X, Y, Sub);
|
||||
input Sub;
|
||||
output [63:0] S;
|
||||
wire CO;
|
||||
wire [0:63] A,B,Q, Bbar;
|
||||
wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
|
||||
|
||||
DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO );
|
||||
assign A[0] = X[0];
|
||||
@ -220,7 +220,7 @@ module cla_sub64 (S, X, Y);
|
||||
|
||||
wire CO;
|
||||
wire VDD = 1'b1;
|
||||
wire [0:63] A,B,Q, Bbar;
|
||||
wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
|
||||
|
||||
DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO );
|
||||
assign A[0] = X[0];
|
||||
|
@ -75,7 +75,7 @@ module sng4comp2(a, b, c, d, cin, cout, carry, sum);
|
||||
|
||||
input logic a;
|
||||
input logic b;
|
||||
input logic c;
|
||||
input logic c;
|
||||
input logic d;
|
||||
input logic cin;
|
||||
output logic cout;
|
||||
|
@ -40,7 +40,9 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
logic [127:0] constant, constant2;
|
||||
logic [63:0] q_const, qp_const, qm_const;
|
||||
logic [63:0] d2, n2;
|
||||
logic [11:0] d3;
|
||||
logic [11:0] d3;
|
||||
|
||||
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7, muxr_out;
|
||||
|
||||
// Check if exponent is odd for sqrt
|
||||
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
|
||||
|
@ -50,7 +50,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
// if exponent is out of bounds
|
||||
|
||||
|
||||
assign aeE = xzeroE|yzeroE ? 0 : xexp + yexp -1023;
|
||||
assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
|
||||
|
||||
assign prodof = (aeE > 2046 && ~aeE[12]);
|
||||
|
||||
@ -61,7 +61,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
// is masked by the bypass mux and two 10 bit adder delays.
|
||||
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
assign aligncntE = zexp -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
|
||||
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
|
||||
//assign aligncntE = zexp - aeE;// KEP use all of aeE
|
||||
|
||||
@ -87,3 +87,4 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
|
||||
// rounding mode. NaNs are propagated or generated.
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -102,6 +102,7 @@ module expgen2(xexp, yexp, zexp,
|
||||
// A mux selects the early result from other FPU blocks or the
|
||||
// normalized FMAC result. Special cases are also detected.
|
||||
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1;
|
||||
assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -3,20 +3,23 @@ module fctrl (
|
||||
input logic [6:0] Funct7D,
|
||||
input logic [6:0] OpD,
|
||||
input logic [4:0] Rs2D,
|
||||
input logic [4:0] Rs1D,
|
||||
input logic [2:0] FrmW,
|
||||
output logic WriteEnD,
|
||||
input logic [2:0] Funct3D,
|
||||
input logic [2:0] FRM_REGW,
|
||||
output logic IllegalFPUInstrD,
|
||||
output logic FRegWriteD,
|
||||
output logic DivSqrtStartD,
|
||||
//output logic [2:0] regSelD,
|
||||
output logic [2:0] WriteSelD,
|
||||
output logic [2:0] FResultSelD,
|
||||
output logic [3:0] OpCtrlD,
|
||||
output logic FmtD,
|
||||
output logic [2:0] FrmD,
|
||||
output logic WriteIntD);
|
||||
|
||||
|
||||
|
||||
//precision is taken directly from instruction
|
||||
assign FmtD = Funct7D[0];
|
||||
// *** fix rounding for dynamic rounding
|
||||
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
|
||||
|
||||
//all subsequent logic is based on the table present
|
||||
//in Section 5 of Wally Architecture Specification
|
||||
@ -29,59 +32,75 @@ module fctrl (
|
||||
//in case of errors
|
||||
case(OpD)
|
||||
//fp instructions sans load
|
||||
7'b1010011 : begin isFP = 1'b1; isFPLD = 1'b0; end
|
||||
7'b1000011 : begin isFP = 1'b1; isFPLD = 1'b0; end
|
||||
7'b1000111 : begin isFP = 1'b1; isFPLD = 1'b0; end
|
||||
7'b1001011 : begin isFP = 1'b1; isFPLD = 1'b0; end
|
||||
7'b1001111 : begin isFP = 1'b1; isFPLD = 1'b0; end
|
||||
7'b0100111 : begin isFP = 1'b1; isFPLD = 1'b0; end
|
||||
//fp load
|
||||
7'b1010011 : begin isFP = 1'b1; isFPLD = 1'b1; end
|
||||
default : begin isFP = 1'b0; isFPLD = 1'b0; end
|
||||
7'b1010011 : isFP = 1'b1;
|
||||
7'b1000011 : isFP = 1'b1;
|
||||
7'b1000111 : isFP = 1'b1;
|
||||
7'b1001011 : isFP = 1'b1;
|
||||
7'b1001111 : isFP = 1'b1;
|
||||
7'b0100111 : isFP = 1'b1;
|
||||
7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111
|
||||
default : isFP = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign WriteEnD = isFP & ~isFPLD;
|
||||
|
||||
|
||||
//useful intermediary signals
|
||||
//
|
||||
//(mult only not supported in current datapath)
|
||||
//set third FMA operand to zero in this case
|
||||
//(or equivalent)
|
||||
logic isAddSub, isFMA, isMult, isDivSqrt, isCvt, isCmp, isFPSTR;
|
||||
|
||||
always_comb begin
|
||||
//checks all but FMA/store/load
|
||||
if(OpD == 7'b1010011) begin
|
||||
case(Funct7D)
|
||||
casez(Funct7D)
|
||||
//compare
|
||||
7'b10100?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b1; isFPSTR = 1'b0; end
|
||||
7'b10100?? : FResultSelD = 3'b001;
|
||||
//div/sqrt
|
||||
7'b0?011?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b1; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b0?011?? : FResultSelD = 3'b000;
|
||||
//add/sub
|
||||
7'b0000??? : begin isAddSub = 1'b1; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b0000??? : FResultSelD = 3'b100;
|
||||
//mult
|
||||
7'b00010?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b1; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b00010?? : FResultSelD = 3'b010;
|
||||
//convert (not precision)
|
||||
7'b110?0?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b1; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b110?0?? : FResultSelD = 3'b100;
|
||||
//convert (precision)
|
||||
7'b010000? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b1; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b010000? : FResultSelD = 3'b100;
|
||||
//Min/Max
|
||||
7'b00101?? : FResultSelD = 3'b001;
|
||||
//sign injection
|
||||
7'b00100?? : FResultSelD = 3'b011;
|
||||
//classify //only if funct3 = 001
|
||||
7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101;
|
||||
//output ReadData1
|
||||
else if (Funct7D[1] == 0) FResultSelD = 3'b111;
|
||||
//output SrcW
|
||||
7'b111100? : FResultSelD = 3'b110;
|
||||
default : FResultSelD = 3'bxxx;
|
||||
endcase
|
||||
end
|
||||
//FMA/store/load
|
||||
else begin
|
||||
case(OpD)
|
||||
//4 FMA instructions
|
||||
7'b1000011 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b1000111 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b1001011 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
7'b1001111 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end
|
||||
//store (load already found)
|
||||
7'b0100111 : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b1; end
|
||||
7'b1000011 : FResultSelD = 3'b010;
|
||||
7'b1000111 : FResultSelD = 3'b010;
|
||||
7'b1001011 : FResultSelD = 3'b010;
|
||||
7'b1001111 : FResultSelD = 3'b010;
|
||||
//store
|
||||
7'b0100111 : FResultSelD = 3'b111;
|
||||
//load
|
||||
7'b0000111 : FResultSelD = 3'b111;
|
||||
default : FResultSelD = 3'bxxx;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//register is chosen based on operation performed
|
||||
//----
|
||||
//write selection is chosen in the same way as
|
||||
@ -90,26 +109,21 @@ module fctrl (
|
||||
|
||||
// reg/write sel logic and assignment
|
||||
//
|
||||
// 3'b000 = add/sub/cvt
|
||||
// 3'b001 = sign
|
||||
// 3'b010 = fma
|
||||
// 3'b011 = cmp
|
||||
// 3'b100 = div/sqrt
|
||||
// 3'b000 = div/sqrt
|
||||
// 3'b001 = cmp
|
||||
// 3'b010 = fma/mult
|
||||
// 3'b011 = sgn inj
|
||||
// 3'b100 = add/sub/cnvt
|
||||
// 3'b101 = classify
|
||||
// 3'b110 = output SrcAW
|
||||
// 3'b111 = output ReadData1
|
||||
//
|
||||
//reg select
|
||||
|
||||
//this value is used enough to be shorthand
|
||||
logic isSign;
|
||||
assign isSign = ~Funct7D[6] & ~Funct7D[5] & Funct7D[4] & ~Funct7D[3] & ~Funct7D[2];
|
||||
|
||||
//write select
|
||||
assign WriteSelD[2] = isDivSqrt & ~isFMA;
|
||||
assign WriteSelD[1] = isFMA | isCmp;
|
||||
//AND of Funct7 for sign
|
||||
assign WriteSelD[0] = isCmp | isSign;
|
||||
|
||||
//if op is div/sqrt - start div/sqrt
|
||||
assign DivSqrtStartD = isDivSqrt & ~isFMA;
|
||||
assign DivSqrtStartD = ~|FResultSelD; // is FResultSelD == 000
|
||||
|
||||
//operation control for each fp operation
|
||||
//has to be expanded over standard to account for
|
||||
@ -126,23 +140,74 @@ module fctrl (
|
||||
//
|
||||
//
|
||||
|
||||
//add/cvt chooses unsigned conversion here
|
||||
assign OpCtrlD[3] = (isAddSub & Rs2D[0]) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0);
|
||||
//add/cvt chooses FP/int or int/FP conversion
|
||||
assign OpCtrlD[2] = (isAddSub & (Funct7D[6] & Funct7D[5] & ~Funct7D[4])) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0);
|
||||
//compare chooses equals
|
||||
//sign chooses sgnjx
|
||||
//add/cvt can chooses between abs/neg functions, but they aren't used in the
|
||||
//wally-spec
|
||||
assign OpCtrlD[1] = (isAddSub & 1'b0) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & FrmW[2]) | (isSign & FrmW[1]);
|
||||
//divide chooses between div/sqrt
|
||||
//compare chooses between LT and LE
|
||||
//sign chooses between sgnj and sgnjn
|
||||
//add/cvt chooses between add/sub or single-precision conversion
|
||||
assign OpCtrlD[0] = (isAddSub & (Funct7D[2] | Funct7D[0])) | (isFMA & 1'b0) | (isDivSqrt & Funct7D[5]) | (isCmp & FrmW[1]) | (isSign & FrmW[0]);
|
||||
|
||||
|
||||
|
||||
always_comb begin
|
||||
IllegalFPUInstrD = 0;
|
||||
case (FResultSelD)
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
3'b000 : OpCtrlD = {3'b0, Funct7D[5]};
|
||||
// cmp
|
||||
// fmin = ?100
|
||||
// fmax = ?101
|
||||
// feq = ?010
|
||||
// flt = ?001
|
||||
// fle = ?011
|
||||
// {?, is min or max, is eq or le, is lt or le}
|
||||
3'b001 : OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])};
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
// fmsub = ?001
|
||||
// fnmadd = ?010
|
||||
// fnmsub = ?011
|
||||
// fmul = ?100
|
||||
// {?, is mul, is negitive, is sub}
|
||||
3'b010 : OpCtrlD = {1'b0, OpD[4:2]};
|
||||
// sgn inj
|
||||
// fsgnj = ??00
|
||||
// fsgnjn = ??01
|
||||
// fsgnjx = ??10
|
||||
3'b011 : OpCtrlD = {2'b0, Funct3D[1:0]};
|
||||
// add/sub/cnvt
|
||||
// fadd = 0000
|
||||
// fsub = 0001
|
||||
// fcvt.w.s = 0100
|
||||
// fcvt.wu.s = 0101
|
||||
// fcvt.s.w = 0110
|
||||
// fcvt.s.wu = 0111
|
||||
// fcvt.s.d = 0010
|
||||
// fcvt.w.d = 1100
|
||||
// fcvt.wu.d = 1101
|
||||
// fcvt.d.w = 1110
|
||||
// fcvt.d.wu = 1111
|
||||
// fcvt.d.s = 1000
|
||||
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub
|
||||
3'b100 : OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])};
|
||||
// classify {?, ?, ?, ?}
|
||||
3'b101 : OpCtrlD = 4'b0;
|
||||
// output SrcAW
|
||||
// fmv.w.x = ???0
|
||||
// fmv.w.d = ???1
|
||||
3'b110 : OpCtrlD = {3'b0, Funct7D[0]};
|
||||
// output ReadData1
|
||||
// flw = ?000
|
||||
// fld = ?001
|
||||
// fsw = ?010
|
||||
// fsd = ?011
|
||||
// fmv.x.w = ?100
|
||||
// fmv.d.w = ?101
|
||||
// {?, is mv, is store, is double or fcvt.d.w}
|
||||
3'b111 : OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])};
|
||||
default : begin OpCtrlD = 4'bxxxx; IllegalFPUInstrD = isFP; end
|
||||
endcase
|
||||
end
|
||||
|
||||
//write to integer source if conv to int occurs
|
||||
//AND of Funct7 for int results
|
||||
assign WriteIntD = isCvt & (Funct7D[6] & Funct7D[5] & ~Funct7D[4] & ~Funct7D[3] & ~Funct7D[2] & ~Funct7D[1]);
|
||||
|
||||
// is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv
|
||||
assign WriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b001)&OpD[6]);
|
||||
// if not writting to int reg and not a store function and not move
|
||||
assign FRegWriteD = ~WriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]);
|
||||
endmodule
|
||||
|
@ -21,7 +21,7 @@ module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
|
||||
|
||||
output logic prodinfE; // X*Y larger than max possible
|
||||
|
||||
// If any input is NaN, propagate the NaN
|
||||
// If any input logic is NaN, propagate the NaN
|
||||
|
||||
assign nanE = xnanE || ynanE || znanE;
|
||||
|
||||
|
@ -13,19 +13,19 @@ module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof,
|
||||
inf, nanM, FmaFlagsM,sticky,prodinfM);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic xsign; // Sign of z
|
||||
input logic ysign; // Sign of z
|
||||
input logic zsign; // Sign of z
|
||||
input logic sticky; // X is Inf
|
||||
input logic prodinfM;
|
||||
input logic xinfM; // X is Inf
|
||||
input logic yinfM; // Y is Inf
|
||||
input logic zinfM; // Z is Inf
|
||||
input logic sumof; // X*Y + z underflows exponent
|
||||
input logic sumuf; // X*Y + z underflows exponent
|
||||
input logic ysign; // Sign of z
|
||||
input logic zsign; // Sign of z
|
||||
input logic sticky; // X is Inf
|
||||
input logic prodinfM;
|
||||
input logic xinfM; // X is Inf
|
||||
input logic yinfM; // Y is Inf
|
||||
input logic zinfM; // Z is Inf
|
||||
input logic sumof; // X*Y + z underflows exponent
|
||||
input logic sumuf; // X*Y + z underflows exponent
|
||||
input logic xzeroM; // x = 0
|
||||
input logic yzeroM; // y = 0
|
||||
input logic zzeroM; // y = 0
|
||||
@ -55,8 +55,8 @@ logic suminf;
|
||||
assign FmaFlagsM[2] = suminf && ~inf;
|
||||
|
||||
// Set the underflow flag for the following cases:
|
||||
// 1) Any input is denormalized
|
||||
// 2) Output would be denormalized or smaller
|
||||
// 1) Any input logic is denormalized
|
||||
// 2) output logic would be denormalized or smaller
|
||||
|
||||
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
|
||||
|
||||
@ -70,7 +70,7 @@ logic suminf;
|
||||
// Set invalid flag for following cases:
|
||||
// 1) Inf - Inf
|
||||
// 2) 0 * Inf
|
||||
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
// 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
|
||||
|
||||
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
|
||||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above
|
||||
|
@ -34,7 +34,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
|
||||
xinfE, yinfE, zinfE, nanE, prodinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//***clean up code, comment, fix names, and c3f000200003fffe * 0000000000000001 + 001ffffffffffffe error
|
||||
|
||||
input logic [63:0] ReadData1E; // input 1
|
||||
input logic [63:0] ReadData2E; // input 2
|
||||
input logic [63:0] ReadData3E; // input 3
|
||||
@ -42,7 +42,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
output logic [12:0] aligncntE; // status flags
|
||||
output logic [105:0] rE; // one result of partial product sum
|
||||
output logic [105:0] sE; // other result of partial products
|
||||
output logic [163:0] tE; // output of alignment shifter
|
||||
output logic [163:0] tE; // output logic of alignment shifter
|
||||
output logic [12:0] aeE; // multiplier expoent
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // ReadData3E >> product
|
||||
@ -65,7 +65,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
|
||||
// Internal nodes
|
||||
|
||||
// output [12:0] aligncntE; // shift count for alignment
|
||||
// output logic [12:0] aligncntE; // shift count for alignment
|
||||
|
||||
|
||||
logic prodof; // ReadData1E*ReadData2E out of range
|
||||
@ -95,7 +95,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
|
||||
special special(.*);
|
||||
|
||||
|
||||
// Instantiate control output
|
||||
// Instantiate control output logic
|
||||
|
||||
flag1 flag1(.*);
|
||||
|
||||
|
@ -15,13 +15,13 @@
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to ReadData1M or ReadData3M inputs
|
||||
// bypass Handles bypass of result to ReadData1M or ReadData3M input logics
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
// special Catch special cases (input logics = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the ReadData1M or ReadData3M inputs for use on the next cycle. In addition, four signals
|
||||
// the ReadData1M or ReadData3M input logics for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IMMM flags.
|
||||
@ -38,19 +38,19 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
|
||||
|
||||
);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [63:0] ReadData1M; // input 1
|
||||
input logic [63:0] ReadData2M; // input 2
|
||||
input logic [63:0] ReadData3M; // input 3
|
||||
input logic [2:0] FrmM; // Rounding mode
|
||||
input logic [12:0] aligncntM; // status flags
|
||||
input logic [105:0] rM; // one result of partial product sum
|
||||
input logic [105:0] sM; // other result of partial products
|
||||
input logic [163:0] tM; // output of alignment shifter
|
||||
input logic [8:0] normcntM; // shift count for normalizer
|
||||
input logic [12:0] aeM; // multiplier expoent
|
||||
input logic bsM; // sticky bit of addend
|
||||
input logic killprodM; // ReadData3M >> product
|
||||
|
||||
input logic [63:0] ReadData1M; // input logic 1
|
||||
input logic [63:0] ReadData2M; // input logic 2
|
||||
input logic [63:0] ReadData3M; // input logic 3
|
||||
input logic [2:0] FrmM; // Rounding mode
|
||||
input logic [12:0] aligncntM; // status flags
|
||||
input logic [105:0] rM; // one result of partial product sum
|
||||
input logic [105:0] sM; // other result of partial products
|
||||
input logic [163:0] tM; // output of alignment shifter
|
||||
input logic [8:0] normcntM; // shift count for normalizer
|
||||
input logic [12:0] aeM; // multiplier expoent
|
||||
input logic bsM; // sticky bit of addend
|
||||
input logic killprodM; // ReadData3M >> product
|
||||
input logic prodinfM;
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
@ -65,10 +65,13 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
|
||||
input logic ynanM;
|
||||
input logic znanM;
|
||||
input logic nanM;
|
||||
input logic [8:0] sumshiftM;
|
||||
input logic [8:0] sumshiftM;
|
||||
input logic sumshiftzeroM;
|
||||
output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
|
||||
output logic [4:0] FmaFlagsM; // status flags
|
||||
|
||||
|
||||
output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
|
||||
output logic [4:0] FmaFlagsM; // status flags
|
||||
|
||||
|
||||
// Internal nodes
|
||||
logic [163:0] sum; // output of carry prop adder
|
||||
|
@ -84,13 +84,14 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
|
||||
wire [127:0] regr_out;
|
||||
wire [2:0] sel_muxa, sel_muxb;
|
||||
wire sel_muxr;
|
||||
wire load_rega, load_regb, load_regc, load_regd, load_regr;
|
||||
wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
|
||||
|
||||
wire donev, sel_muxrv, sel_muxsv;
|
||||
wire [1:0] sel_muxav, sel_muxbv;
|
||||
wire load_regav, load_regbv, load_regcv;
|
||||
wire load_regrv, load_regsv;
|
||||
|
||||
logic exp_cout1, exp_cout2, exp_odd, open;
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the DivOpType , and their precision DivP.
|
||||
// Single precision inputs are converted to double precision
|
||||
@ -138,7 +139,7 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
|
||||
// FSM : control divider
|
||||
fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
|
||||
clk, reset, DivStart, error, DivOpType);
|
||||
clk, reset, DivStart, DivOpType);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. The rounding units also handles special cases and
|
||||
@ -191,6 +192,9 @@ module brent_kung (c, p, g);
|
||||
input [13:0] g;
|
||||
output [14:1] c;
|
||||
|
||||
logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
|
||||
logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
|
||||
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
|
||||
// parallel-prefix, Brent-Kung
|
||||
|
||||
// Stage 1: Generates G/DivP pairs that span 1 bits
|
||||
|
@ -1,19 +1,22 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// `include "../../config/rv64icfd/wally-config.vh" //debug
|
||||
|
||||
module fpu (
|
||||
//input logic [2:0] FrmD,
|
||||
input logic [2:0] FRM_REGW, // Rounding mode from CSR
|
||||
input logic reset,
|
||||
//input logic clear, // *** what is this used for?
|
||||
//input logic clear, // *** not being used anywhere
|
||||
input logic clk,
|
||||
input logic [31:0] InstrD,
|
||||
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
|
||||
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
|
||||
input logic StallE, StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
output logic [4:0] SetFflagsM,
|
||||
output logic [31:0] FSROutW,
|
||||
output logic DivSqrtDoneE,
|
||||
output logic FInvalInstrD,
|
||||
output logic IllegalFPUInstrD,
|
||||
output logic [`XLEN-1:0] FPUResultW);
|
||||
|
||||
//NOTE:
|
||||
@ -45,12 +48,12 @@ module fpu (
|
||||
localparam PipeEnable = 1'b1;
|
||||
always_comb begin
|
||||
|
||||
PipeEnableDE = PipeEnable;
|
||||
PipeEnableEM = PipeEnable;
|
||||
PipeEnableMW = PipeEnable;
|
||||
PipeClearDE = PipeClear;
|
||||
PipeClearEM = PipeClear;
|
||||
PipeClearMW = PipeClear;
|
||||
PipeEnableDE = StallE;
|
||||
PipeEnableEM = StallM;
|
||||
PipeEnableMW = StallW;
|
||||
PipeClearDE = FlushE;
|
||||
PipeClearEM = FlushM;
|
||||
PipeClearMW = FlushW;
|
||||
|
||||
end
|
||||
|
||||
@ -63,33 +66,33 @@ module fpu (
|
||||
//
|
||||
|
||||
//wally-spec D stage control logic signal instantiation
|
||||
logic IllegalFPUInstrFaultD;
|
||||
logic FRegWriteD;
|
||||
logic [2:0] FResultSelD;
|
||||
logic [2:0] FrmD;
|
||||
logic PD;
|
||||
logic FmtD;
|
||||
logic DivSqrtStartD;
|
||||
logic [3:0] OpCtrlD;
|
||||
logic WriteIntD;
|
||||
|
||||
//top-level controller for FPU
|
||||
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Rs1D(InstrD[19:15]), .FrmW(InstrD[14:12]), .WriteEnD(FRegWriteD), .WriteSelD(FResultSelD), .FmtD(PD), .*);
|
||||
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
|
||||
|
||||
//instantiation of D stage regfile signals (includes some W stage signals
|
||||
//for easy reference)
|
||||
logic [2:0] FrmW;
|
||||
logic WriteEnW;
|
||||
logic FmtW;
|
||||
logic FRegWriteW;
|
||||
logic [4:0] RdW, Rs1D, Rs2D, Rs3D;
|
||||
logic [`XLEN-1:0] WriteDataW;
|
||||
logic [63:0] FPUResultDirW;
|
||||
logic [`XLEN-1:0] ReadData1D, ReadData2D, ReadData3D;
|
||||
|
||||
//regfile instantiation
|
||||
freg3adr fpregfile (FrmW, reset, PipeClear, clk, RdW, WriteEnW, Rs1D, Rs2D, Rs3D, WriteDataW, ReadData1D, ReadData2D, ReadData3D);
|
||||
|
||||
always_comb begin
|
||||
FrmW = InstrD[14:12];
|
||||
end
|
||||
freg3adr fpregfile (FmtW, reset, PipeClear, clk, RdW, FRegWriteW, InstrD[19:15], InstrD[24:20], InstrD[31:27], FPUResultDirW, ReadData1D, ReadData2D, ReadData3D);
|
||||
|
||||
//always_comb begin
|
||||
// FrmW = InstrD[14:12];
|
||||
//end
|
||||
//
|
||||
//END DECODE STAGE
|
||||
//#########################################
|
||||
@ -102,7 +105,7 @@ module fpu (
|
||||
logic FRegWriteE;
|
||||
logic [2:0] FResultSelE;
|
||||
logic [2:0] FrmE;
|
||||
logic PE;
|
||||
logic FmtE;
|
||||
logic DivSqrtStartE;
|
||||
logic [3:0] OpCtrlE;
|
||||
|
||||
@ -187,9 +190,10 @@ module fpu (
|
||||
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE);
|
||||
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
|
||||
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
|
||||
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, PD, PE);
|
||||
flopenrc #(4) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
|
||||
flopenrc #(1) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
|
||||
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
|
||||
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
|
||||
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
|
||||
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
|
||||
|
||||
//
|
||||
//END D/E PIPE
|
||||
@ -205,10 +209,10 @@ module fpu (
|
||||
fpdiv fpdivsqrt (.*);
|
||||
|
||||
//first of two-stage instance of floating-point add/cvt unit
|
||||
fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, AddOp1E, AddOp2E, AddRmE, AddOpTypeE, AddPE, AddOvEnE, AddUnEnE);
|
||||
fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, ReadData1E, ReadData2E, FrmE, OpCtrlE, FmtE);
|
||||
|
||||
//first of two-stage instance of floating-point comparator
|
||||
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, CmpOp1E, CmpOp2E, CmpSelE);
|
||||
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, ReadData1E, ReadData2E, OpCtrlE[1:0]);
|
||||
|
||||
//first and only instance of floating-point sign converter
|
||||
fpusgn fpsgn (.*);
|
||||
@ -221,33 +225,33 @@ module fpu (
|
||||
|
||||
//truncate to 64 bits
|
||||
//(causes warning during compilation - case never reached)
|
||||
if(`XLEN > 64) begin
|
||||
DivOp1 <= ReadData1E[`XLEN-1:`XLEN-64];
|
||||
DivOp2 <= ReadData2E[`XLEN-1:`XLEN-64];
|
||||
AddOp1E <= ReadData1E[`XLEN-1:`XLEN-64];
|
||||
AddOp2E <= ReadData2E[`XLEN-1:`XLEN-64];
|
||||
CmpOp1E <= ReadData1E[`XLEN-1:`XLEN-64];
|
||||
CmpOp2E <= ReadData2E[`XLEN-1:`XLEN-64];
|
||||
SgnOp1E <= ReadData1E[`XLEN-1:`XLEN-64];
|
||||
SgnOp2E <= ReadData2E[`XLEN-1:`XLEN-64];
|
||||
end
|
||||
//zero extend to 64 bits
|
||||
else begin
|
||||
DivOp1 <= {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
DivOp2 <= {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
AddOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
AddOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
CmpOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
CmpOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
SgnOp1E <= {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
SgnOp2E <= {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
end
|
||||
// if(`XLEN > 64) begin // ***KEP this isn't usedand it causes a lint error
|
||||
// DivOp1 = ReadData1E[`XLEN-1:`XLEN-64];
|
||||
// DivOp2 = ReadData2E[`XLEN-1:`XLEN-64];
|
||||
// AddOp1E = ReadData1E[`XLEN-1:`XLEN-64];
|
||||
// AddOp2E = ReadData2E[`XLEN-1:`XLEN-64];
|
||||
// CmpOp1E = ReadData1E[`XLEN-1:`XLEN-64];
|
||||
// CmpOp2E = ReadData2E[`XLEN-1:`XLEN-64];
|
||||
// SgnOp1E = ReadData1E[`XLEN-1:`XLEN-64];
|
||||
// SgnOp2E = ReadData2E[`XLEN-1:`XLEN-64];
|
||||
// end
|
||||
// //zero extend to 64 bits
|
||||
// else begin
|
||||
// DivOp1 = {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
// DivOp2 = {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
// AddOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
// AddOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
// CmpOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
// CmpOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
// SgnOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
|
||||
// SgnOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
|
||||
// end
|
||||
|
||||
//assign op codes
|
||||
AddOpTypeE[3:0] <= OpCtrlE[3:0];
|
||||
CmpSelE[1:0] <= OpCtrlE[1:0];
|
||||
DivOpType <= OpCtrlE[0];
|
||||
SgnOpCodeE[1:0] <= OpCtrlE[1:0];
|
||||
AddOpTypeE[3:0] = OpCtrlE[3:0];
|
||||
CmpSelE[1:0] = OpCtrlE[1:0];
|
||||
DivOpType = OpCtrlE[0];
|
||||
SgnOpCodeE[1:0] = OpCtrlE[1:0];
|
||||
|
||||
end
|
||||
|
||||
@ -266,7 +270,7 @@ module fpu (
|
||||
logic FRegWriteM;
|
||||
logic [2:0] FResultSelM;
|
||||
logic [2:0] FrmM;
|
||||
logic PM;
|
||||
logic FmtM;
|
||||
logic [3:0] OpCtrlM;
|
||||
|
||||
//instantiate M stage FMA signals here ***rename fma signals and resize for XLEN
|
||||
@ -340,17 +344,17 @@ module fpu (
|
||||
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM);
|
||||
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM);
|
||||
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
|
||||
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
|
||||
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
|
||||
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM);
|
||||
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM);
|
||||
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM);
|
||||
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM);
|
||||
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM);
|
||||
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
|
||||
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
|
||||
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
|
||||
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM);
|
||||
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM);
|
||||
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
|
||||
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
|
||||
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
|
||||
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM);
|
||||
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM);
|
||||
flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM);
|
||||
flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM);
|
||||
flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM);
|
||||
@ -414,8 +418,9 @@ module fpu (
|
||||
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM);
|
||||
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
|
||||
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
|
||||
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, PE, PM);
|
||||
flopenrc #(4) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
|
||||
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
|
||||
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
|
||||
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
|
||||
|
||||
//
|
||||
//END E/M PIPE
|
||||
@ -443,9 +448,7 @@ module fpu (
|
||||
//
|
||||
|
||||
//wally-spec W stage control logic signal instantiation
|
||||
logic FRegWriteW;
|
||||
logic [2:0] FResultSelW;
|
||||
logic PW;
|
||||
|
||||
//instantiate W stage fma signals here
|
||||
logic [63:0] FmaResultW;
|
||||
@ -470,9 +473,14 @@ module fpu (
|
||||
logic AddDenormW;
|
||||
|
||||
//instantiation of W stage cmp signals
|
||||
logic [63:0] CmpResultW;
|
||||
logic CmpInvalidW;
|
||||
logic [1:0] CmpFCCW;
|
||||
|
||||
//instantiation of W stage classify signals
|
||||
logic [63:0] ClassResultW;
|
||||
logic [4:0] ClassFlagsW;
|
||||
|
||||
//*****************
|
||||
//fma M/W pipe registers
|
||||
//*****************
|
||||
@ -510,7 +518,9 @@ module fpu (
|
||||
//*****************
|
||||
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW);
|
||||
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
|
||||
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, PM, PW);
|
||||
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
|
||||
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
|
||||
flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
|
||||
|
||||
////END M/W PIPE
|
||||
//*****************************************
|
||||
@ -527,21 +537,61 @@ module fpu (
|
||||
//set to cmp flags
|
||||
//iff bit one is low - if bit zero is active set to add/cvt flags - otherwise
|
||||
//set to div/sqrt flags
|
||||
assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
|
||||
(FResultSelW[1]) ?
|
||||
( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) )
|
||||
: ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) )
|
||||
);
|
||||
//assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
|
||||
// (FResultSelW[1]) ?
|
||||
// ( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) )
|
||||
// : ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) )
|
||||
// );
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
// div/sqrt
|
||||
3'b000 : FPUFlagsW = DivFlagsW;
|
||||
// cmp
|
||||
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
|
||||
//fma/mult
|
||||
3'b010 : FPUFlagsW = FmaFlagsW;
|
||||
// sgn inj
|
||||
3'b011 : FPUFlagsW = SgnFlagsW;
|
||||
// add/sub/cnvt
|
||||
3'b100 : FPUFlagsW = AddFlagsW;
|
||||
// classify
|
||||
3'b101 : FPUFlagsW = ClassFlagsW;
|
||||
// output SrcAW
|
||||
3'b110 : FPUFlagsW = 5'b0;
|
||||
// output ReadData1
|
||||
3'b111 : FPUFlagsW = 5'b0;
|
||||
default : FPUFlagsW = 5'bxxxxx;
|
||||
endcase
|
||||
end
|
||||
|
||||
//result mux via in-line ternaries
|
||||
logic [63:0] FPUResultDirW;
|
||||
//the uses the same logic as for flag signals
|
||||
assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
|
||||
(FResultSelW[1]) ?
|
||||
( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) )
|
||||
: ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) )
|
||||
);
|
||||
|
||||
//assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
|
||||
// (FResultSelW[1]) ?
|
||||
// ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) )
|
||||
// : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) )
|
||||
// );
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
// div/sqrt
|
||||
3'b000 : FPUResultDirW = DivResultW;
|
||||
// cmp
|
||||
3'b001 : FPUResultDirW = CmpResultW;
|
||||
//fma/mult
|
||||
3'b010 : FPUResultDirW = FmaResultW;
|
||||
// sgn inj
|
||||
3'b011 : FPUResultDirW = SgnResultW;
|
||||
// add/sub/cnvt
|
||||
3'b100 : FPUResultDirW = AddResultW;
|
||||
// classify
|
||||
3'b101 : FPUResultDirW = ClassResultW;
|
||||
// output SrcAW
|
||||
3'b110 : FPUResultDirW = SrcAW;
|
||||
// output ReadData1
|
||||
3'b111 : FPUResultDirW = ReadData1W;
|
||||
default : FPUResultDirW = {64{1'bx}};
|
||||
endcase
|
||||
end
|
||||
//interface between XLEN size datapath and double-precision sized
|
||||
//floating-point results
|
||||
//
|
||||
@ -555,11 +605,12 @@ module fpu (
|
||||
// Repetition multiplier must be constant.
|
||||
|
||||
//if(`XLEN > 64) begin
|
||||
// FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}};
|
||||
// FPUResultW = {FPUResultDirW,{XLENDIFF{1'b0}}};
|
||||
//end
|
||||
//truncate
|
||||
//else begin
|
||||
FPUResultW <= FPUResultDirW[63:64-`XLEN];
|
||||
FPUResultW = FPUResultDirW[63:64-`XLEN];
|
||||
SetFflagsM = FPUFlagsW;
|
||||
//end
|
||||
|
||||
end
|
||||
|
@ -27,18 +27,16 @@
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin, OvEn, UnEn);
|
||||
module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin);
|
||||
|
||||
input [63:0] op1; // 1st input operand (A)
|
||||
input [63:0] op2; // 2nd input operand (B)
|
||||
input [2:0] rm; // Rounding mode - specify values
|
||||
input [3:0] op_type; // Function opcode
|
||||
input Pin; // Result Precision (0 for double, 1 for single)
|
||||
input OvEn; // Overflow trap enabled
|
||||
input UnEn; // Underflow trap enabled
|
||||
input logic [63:0] op1; // 1st input operand (A)
|
||||
input logic [63:0] op2; // 2nd input operand (B)
|
||||
input logic [2:0] rm; // Rounding mode - specify values
|
||||
input logic [3:0] op_type; // Function opcode
|
||||
input logic Pin; // Result Precision (1 for double, 0 for single)
|
||||
|
||||
wire P;
|
||||
assign P = Pin | op_type[2];
|
||||
assign P = ~Pin | op_type[2];
|
||||
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
@ -56,23 +54,23 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
output [63:0] Float1;
|
||||
output [63:0] Float2;
|
||||
output [10:0] exponent;
|
||||
output [10:0] exponent_postsum;
|
||||
output [10:0] exp1_denorm, exp2_denorm;
|
||||
output [63:0] sum, sum_tc;
|
||||
output [3:0] sel_inv;
|
||||
output corr_sign;
|
||||
output signA;
|
||||
output op1_Norm, op2_Norm;
|
||||
output opA_Norm, opB_Norm;
|
||||
output Invalid;
|
||||
output DenormIn;
|
||||
// output exp_valid;
|
||||
output convert;
|
||||
output swap;
|
||||
output normal_overflow;
|
||||
output logic [63:0] Float1;
|
||||
output logic [63:0] Float2;
|
||||
output logic [10:0] exponent;
|
||||
output logic [10:0] exponent_postsum;
|
||||
output logic [11:0] exp1_denorm, exp2_denorm;//KEP used to be [10:0]
|
||||
output logic [63:0] sum, sum_tc;
|
||||
output logic [3:0] sel_inv;
|
||||
output logic corr_sign;
|
||||
output logic signA;
|
||||
output logic op1_Norm, op2_Norm;
|
||||
output logic opA_Norm, opB_Norm;
|
||||
output logic Invalid;
|
||||
output logic DenormIn;
|
||||
// output logic exp_valid;
|
||||
output logic convert;
|
||||
output logic swap;
|
||||
output logic normal_overflow;
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
@ -129,15 +127,15 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
|
||||
|
||||
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
|
||||
assign exp1_denorm = swap ? (exp1 - ZP_mantissaB) : (exp1 - ZP_mantissaA);
|
||||
assign exp2_denorm = swap ? (exp2 - ZP_mantissaA) : (exp2 - ZP_mantissaB);
|
||||
assign exp1_denorm = swap ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
|
||||
assign exp2_denorm = swap ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
|
||||
|
||||
// Determine the alignment shift and limit it to 63. If any bit from
|
||||
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
|
||||
assign exp_shift = swap ? exp_diff2 : exp_diff1;
|
||||
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
|
||||
| exp_shift[8] | exp_shift[7] | exp_shift[6];
|
||||
assign align_shift = exp_shift | {6{exp_gt63}};
|
||||
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
|
||||
|
||||
// Unpack the 52-bit mantissas to 57-bit numbers of the form.
|
||||
// 001.M[51]M[50] ... M[1]M[0]00
|
||||
@ -193,7 +191,8 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
|
||||
//***KEP used to be (sum == 16'h0) I am unsure what it's supposed to be
|
||||
assign normal_overflow = (DenormIn & (sum == 64'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -27,7 +27,7 @@
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
|
||||
module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
|
||||
|
||||
input [63:0] AddOp1M; // 1st input operand (A)
|
||||
input [63:0] AddOp2M; // 2nd input operand (B)
|
||||
@ -51,7 +51,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
input AddCorrSignM;
|
||||
input AddConvertM;
|
||||
input AddSwapM;
|
||||
input AddNormOvflowM;
|
||||
// input AddNormOvflowM;
|
||||
|
||||
output [63:0] AddResultM; // Result of operation
|
||||
output [4:0] AddFlagsM; // IEEE exception flags
|
||||
@ -80,6 +80,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
wire Float2_sum_tc_comp;
|
||||
wire normal_underflow;
|
||||
wire [63:0] sum_corr;
|
||||
logic AddNormOvflowM;
|
||||
|
||||
//AddExponentM value pre-rounding with considerations for denormalized
|
||||
//cases/conversion cases
|
||||
@ -116,7 +117,8 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 16'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
|
@ -1,8 +1,9 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// `include "../../config/rv64icfd/wally-config.vh" //debug
|
||||
|
||||
module freg1adr (
|
||||
input logic [2:0] frm,
|
||||
input logic FmtW,
|
||||
input logic reset,
|
||||
input logic clear,
|
||||
input logic clk,
|
||||
@ -13,7 +14,7 @@ module freg1adr (
|
||||
output logic [`XLEN-1:0] readData);
|
||||
|
||||
//note - not word aligning based on precision of
|
||||
//operation (frm)
|
||||
//operation (FmtW)
|
||||
|
||||
//reg number should remain static, but it doesn't hurt
|
||||
//to parameterize
|
||||
@ -139,7 +140,7 @@ endmodule
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module freg2adr (
|
||||
input logic [2:0] frm,
|
||||
input logic FmtW,
|
||||
input logic reset,
|
||||
input logic clear,
|
||||
input logic clk,
|
||||
@ -152,7 +153,7 @@ module freg2adr (
|
||||
output logic [`XLEN-1:0] readData2);
|
||||
|
||||
//note - not word aligning based on precision of
|
||||
//operation (frm)
|
||||
//operation (FmtW)
|
||||
|
||||
//reg number should remain static, but it doesn't hurt
|
||||
//to parameterize
|
||||
@ -310,7 +311,7 @@ endmodule
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module freg3adr (
|
||||
input logic [2:0] frm,
|
||||
input logic FmtW,
|
||||
input logic reset,
|
||||
input logic clear,
|
||||
input logic clk,
|
||||
@ -325,7 +326,7 @@ module freg3adr (
|
||||
output logic [`XLEN-1:0] readData3);
|
||||
|
||||
//note - not word aligning based on precision of
|
||||
//operation (frm)
|
||||
//operation (FmtW)
|
||||
|
||||
//reg number should remain static, but it doesn't hurt
|
||||
//to parameterize
|
||||
|
@ -1,12 +1,12 @@
|
||||
module fsm (done, load_rega, load_regb, load_regc,
|
||||
load_regd, load_regr, load_regs,
|
||||
sel_muxa, sel_muxb, sel_muxr,
|
||||
clk, reset, start, error, op_type);
|
||||
clk, reset, start, op_type);
|
||||
|
||||
input clk;
|
||||
input reset;
|
||||
input start;
|
||||
input error;
|
||||
// input error;
|
||||
input op_type;
|
||||
|
||||
output done;
|
||||
@ -50,9 +50,9 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
always @(posedge clk)
|
||||
begin
|
||||
if(reset==1'b1)
|
||||
CURRENT_STATE<=S0;
|
||||
CURRENT_STATE=S0;
|
||||
else
|
||||
CURRENT_STATE<=NEXT_STATE;
|
||||
CURRENT_STATE=NEXT_STATE;
|
||||
end
|
||||
|
||||
always @(*)
|
||||
@ -72,7 +72,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S0;
|
||||
NEXT_STATE = S0;
|
||||
end
|
||||
else if (start==1'b1 && op_type==1'b0)
|
||||
begin
|
||||
@ -86,7 +86,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b001;
|
||||
sel_muxb = 3'b001;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S1;
|
||||
NEXT_STATE = S1;
|
||||
end // if (start==1'b1 && op_type==1'b0)
|
||||
else if (start==1'b1 && op_type==1'b1)
|
||||
begin
|
||||
@ -100,7 +100,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b010;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S13;
|
||||
NEXT_STATE = S13;
|
||||
end
|
||||
end // case: S0
|
||||
S1:
|
||||
@ -115,7 +115,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b010;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S2;
|
||||
NEXT_STATE = S2;
|
||||
end
|
||||
S2: // iteration 1
|
||||
begin
|
||||
@ -129,7 +129,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b011;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S3;
|
||||
NEXT_STATE = S3;
|
||||
end
|
||||
S3:
|
||||
begin
|
||||
@ -143,7 +143,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b010;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S4;
|
||||
NEXT_STATE = S4;
|
||||
end
|
||||
S4: // iteration 2
|
||||
begin
|
||||
@ -157,7 +157,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b011;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S5;
|
||||
NEXT_STATE = S5;
|
||||
end
|
||||
S5:
|
||||
begin
|
||||
@ -171,7 +171,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b010;
|
||||
sel_muxr = 1'b0; // add
|
||||
NEXT_STATE <= S6;
|
||||
NEXT_STATE = S6;
|
||||
end
|
||||
S6: // iteration 3
|
||||
begin
|
||||
@ -185,7 +185,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b011;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S8;
|
||||
NEXT_STATE = S8;
|
||||
end
|
||||
S7:
|
||||
begin
|
||||
@ -199,7 +199,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b010;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S8;
|
||||
NEXT_STATE = S8;
|
||||
end // case: S7
|
||||
S8: // q,qm,qp
|
||||
begin
|
||||
@ -213,7 +213,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S9;
|
||||
NEXT_STATE = S9;
|
||||
end
|
||||
S9: // rem
|
||||
begin
|
||||
@ -227,7 +227,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b1;
|
||||
NEXT_STATE <= S10;
|
||||
NEXT_STATE = S10;
|
||||
end
|
||||
S10: // done
|
||||
begin
|
||||
@ -241,7 +241,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S0;
|
||||
NEXT_STATE = S0;
|
||||
end
|
||||
S13: // start of sqrt path
|
||||
begin
|
||||
@ -255,7 +255,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b010;
|
||||
sel_muxb = 3'b001;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S14;
|
||||
NEXT_STATE = S14;
|
||||
end
|
||||
S14:
|
||||
begin
|
||||
@ -269,7 +269,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b001;
|
||||
sel_muxb = 3'b100;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S15;
|
||||
NEXT_STATE = S15;
|
||||
end
|
||||
S15: // iteration 1
|
||||
begin
|
||||
@ -283,7 +283,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b011;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S16;
|
||||
NEXT_STATE = S16;
|
||||
end
|
||||
S16:
|
||||
begin
|
||||
@ -297,7 +297,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S17;
|
||||
NEXT_STATE = S17;
|
||||
end
|
||||
S17:
|
||||
begin
|
||||
@ -311,7 +311,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b100;
|
||||
sel_muxb = 3'b010;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S18;
|
||||
NEXT_STATE = S18;
|
||||
end
|
||||
S18: // iteration 2
|
||||
begin
|
||||
@ -325,7 +325,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b011;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S19;
|
||||
NEXT_STATE = S19;
|
||||
end
|
||||
S19:
|
||||
begin
|
||||
@ -339,7 +339,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S20;
|
||||
NEXT_STATE = S20;
|
||||
end
|
||||
S20:
|
||||
begin
|
||||
@ -353,7 +353,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b100;
|
||||
sel_muxb = 3'b010;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S21;
|
||||
NEXT_STATE = S21;
|
||||
end
|
||||
S21: // iteration 3
|
||||
begin
|
||||
@ -367,7 +367,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b011;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S22;
|
||||
NEXT_STATE = S22;
|
||||
end
|
||||
S22:
|
||||
begin
|
||||
@ -381,7 +381,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b011;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S23;
|
||||
NEXT_STATE = S23;
|
||||
end
|
||||
S23:
|
||||
begin
|
||||
@ -395,7 +395,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b100;
|
||||
sel_muxb = 3'b010;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S24;
|
||||
NEXT_STATE = S24;
|
||||
end
|
||||
S24: // q,qm,qp
|
||||
begin
|
||||
@ -409,7 +409,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S25;
|
||||
NEXT_STATE = S25;
|
||||
end
|
||||
S25: // rem
|
||||
begin
|
||||
@ -423,7 +423,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b011;
|
||||
sel_muxb = 3'b110;
|
||||
sel_muxr = 1'b1;
|
||||
NEXT_STATE <= S26;
|
||||
NEXT_STATE = S26;
|
||||
end
|
||||
S26: // done
|
||||
begin
|
||||
@ -437,7 +437,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S0;
|
||||
NEXT_STATE = S0;
|
||||
end
|
||||
default:
|
||||
begin
|
||||
@ -451,7 +451,7 @@ module fsm (done, load_rega, load_regb, load_regc,
|
||||
sel_muxa = 3'b000;
|
||||
sel_muxb = 3'b000;
|
||||
sel_muxr = 1'b0;
|
||||
NEXT_STATE <= S0;
|
||||
NEXT_STATE = S0;
|
||||
end
|
||||
endcase // case(CURRENT_STATE)
|
||||
end // always @ (CURRENT_STATE or X)
|
||||
|
@ -30,6 +30,56 @@ module ladner_fischer128 (c, p, g);
|
||||
input [127:0] g;
|
||||
|
||||
output [128:1] c;
|
||||
|
||||
|
||||
logic G_1_0, G_3_2, P_3_2, G_5_4, P_5_4, G_7_6, P_7_6, G_9_8, P_9_8, G_11_10, P_11_10, G_13_12
|
||||
, P_13_12, G_15_14, P_15_14, G_17_16, P_17_16, G_19_18, P_19_18, G_21_20, P_21_20, G_23_22
|
||||
, P_23_22, G_25_24, P_25_24, G_27_26, P_27_26, G_29_28, P_29_28, G_31_30, P_31_30, G_33_32
|
||||
, P_33_32, G_35_34, P_35_34, G_37_36, P_37_36, G_39_38, P_39_38, G_41_40, P_41_40, G_43_42
|
||||
, P_43_42, G_45_44, P_45_44, G_47_46, P_47_46, G_49_48, P_49_48, G_51_50, P_51_50, G_53_52
|
||||
, P_53_52, G_55_54, P_55_54, G_57_56, P_57_56, G_59_58, P_59_58, G_61_60, P_61_60, G_63_62
|
||||
, P_63_62, G_65_64, P_65_64, G_67_66, P_67_66, G_69_68, P_69_68, G_71_70, P_71_70, G_73_72
|
||||
, P_73_72, G_75_74, P_75_74, G_77_76, P_77_76, G_79_78, P_79_78, G_81_80, P_81_80, G_83_82
|
||||
, P_83_82, G_85_84, P_85_84, G_87_86, P_87_86, G_89_88, P_89_88, G_91_90, P_91_90, G_93_92
|
||||
, P_93_92, G_95_94, P_95_94, G_97_96, P_97_96, G_99_98, P_99_98, G_101_100, P_101_100, G_103_102
|
||||
, P_103_102, G_105_104, P_105_104, G_107_106, P_107_106, G_109_108, P_109_108, G_111_110, P_111_110
|
||||
, G_113_112, P_113_112, G_115_114, P_115_114, G_117_116, P_117_116, G_119_118, P_119_118, G_121_120
|
||||
, P_121_120, G_123_122, P_123_122, G_125_124, P_125_124, G_127_126, P_127_126, G_3_0, G_7_4, P_7_4
|
||||
, G_11_8, P_11_8, G_15_12, P_15_12, G_19_16, P_19_16, G_23_20, P_23_20, G_27_24, P_27_24, G_31_28
|
||||
, P_31_28, G_35_32, P_35_32, G_39_36, P_39_36, G_43_40, P_43_40, G_47_44, P_47_44, G_51_48, P_51_48
|
||||
, G_55_52, P_55_52, G_59_56, P_59_56, G_63_60, P_63_60, G_67_64, P_67_64, G_71_68, P_71_68, G_75_72
|
||||
, P_75_72, G_79_76, P_79_76, G_83_80, P_83_80, G_87_84, P_87_84, G_91_88, P_91_88, G_95_92, P_95_92
|
||||
, G_99_96, P_99_96, G_103_100, P_103_100, G_107_104, P_107_104, G_111_108, P_111_108, G_115_112
|
||||
, P_115_112, G_119_116, P_119_116, G_123_120, P_123_120, G_127_124, P_127_124, G_5_0, G_7_0, G_13_8
|
||||
, P_13_8, G_15_8, P_15_8, G_21_16, P_21_16, G_23_16, P_23_16, G_29_24, P_29_24, G_31_24, P_31_24
|
||||
, G_37_32, P_37_32, G_39_32, P_39_32, G_45_40, P_45_40, G_47_40, P_47_40, G_53_48, P_53_48, G_55_48
|
||||
, P_55_48, G_61_56, P_61_56, G_63_56, P_63_56, G_69_64, P_69_64, G_71_64, P_71_64, G_77_72, P_77_72
|
||||
, G_79_72, P_79_72, G_85_80, P_85_80, G_87_80, P_87_80, G_93_88, P_93_88, G_95_88, P_95_88, G_101_96
|
||||
, P_101_96, G_103_96, P_103_96, G_109_104, P_109_104, G_111_104, P_111_104, G_117_112, P_117_112
|
||||
, G_119_112, P_119_112, G_125_120, P_125_120, G_127_120, P_127_120, G_9_0, G_11_0, G_13_0, G_15_0, G_25_16
|
||||
, P_25_16, G_27_16, P_27_16, G_29_16, P_29_16, G_31_16, P_31_16, G_41_32, P_41_32, G_43_32, P_43_32, G_45_32
|
||||
, P_45_32, G_47_32, P_47_32, G_57_48, P_57_48, G_59_48, P_59_48, G_61_48, P_61_48, G_63_48, P_63_48, G_73_64
|
||||
, P_73_64, G_75_64, P_75_64, G_77_64, P_77_64, G_79_64, P_79_64, G_89_80, P_89_80, G_91_80, P_91_80
|
||||
, G_93_80, P_93_80, G_95_80, P_95_80, G_105_96, P_105_96, G_107_96, P_107_96, G_109_96, P_109_96
|
||||
, G_111_96, P_111_96, G_121_112, P_121_112, G_123_112, P_123_112, G_125_112, P_125_112, G_127_112
|
||||
, P_127_112, G_17_0, G_19_0, G_21_0, G_23_0, G_25_0, G_27_0, G_29_0, G_31_0, G_49_32, P_49_32, G_51_32
|
||||
, P_51_32, G_53_32, P_53_32, G_55_32, P_55_32, G_57_32, P_57_32, G_59_32, P_59_32, G_61_32, P_61_32
|
||||
, G_63_32, P_63_32, G_81_64, P_81_64, G_83_64, P_83_64, G_85_64, P_85_64, G_87_64, P_87_64, G_89_64, P_89_64
|
||||
, G_91_64, P_91_64, G_93_64, P_93_64, G_95_64, P_95_64, G_113_96, P_113_96, G_115_96, P_115_96
|
||||
, G_117_96, P_117_96, G_119_96, P_119_96, G_121_96, P_121_96, G_123_96, P_123_96, G_125_96, P_125_96
|
||||
, G_127_96, P_127_96, G_33_0, G_35_0, G_37_0, G_39_0, G_41_0, G_43_0, G_45_0, G_47_0, G_49_0, G_51_0
|
||||
, G_53_0, G_55_0, G_57_0, G_59_0, G_61_0, G_63_0, G_97_64, P_97_64, G_99_64, P_99_64, G_101_64, P_101_64
|
||||
, G_103_64, P_103_64, G_105_64, P_105_64, G_107_64, P_107_64, G_109_64, P_109_64, G_111_64, P_111_64
|
||||
, G_113_64, P_113_64, G_115_64, P_115_64, G_117_64, P_117_64, G_119_64, P_119_64, G_121_64, P_121_64
|
||||
, G_123_64, P_123_64, G_125_64, P_125_64, G_127_64, P_127_64, G_65_0, G_67_0, G_69_0, G_71_0, G_73_0
|
||||
, G_75_0, G_77_0, G_79_0, G_81_0, G_83_0, G_85_0, G_87_0, G_89_0, G_91_0, G_93_0, G_95_0, G_97_0
|
||||
, G_99_0, G_101_0, G_103_0, G_105_0, G_107_0, G_109_0, G_111_0, G_113_0, G_115_0, G_117_0, G_119_0
|
||||
, G_121_0, G_123_0, G_125_0, G_127_0, G_2_0, G_4_0, G_6_0, G_8_0, G_10_0, G_12_0, G_14_0, G_16_0
|
||||
, G_18_0, G_20_0, G_22_0, G_24_0, G_26_0, G_28_0, G_30_0, G_32_0, G_34_0, G_36_0, G_38_0, G_40_0
|
||||
, G_42_0, G_44_0, G_46_0, G_48_0, G_50_0, G_52_0, G_54_0, G_56_0, G_58_0, G_60_0, G_62_0, G_64_0
|
||||
, G_66_0, G_68_0, G_70_0, G_72_0, G_74_0, G_76_0, G_78_0, G_80_0, G_82_0, G_84_0, G_86_0, G_88_0
|
||||
, G_90_0, G_92_0, G_94_0, G_96_0, G_98_0, G_100_0, G_102_0, G_104_0, G_106_0, G_108_0, G_110_0, G_112_0
|
||||
, G_114_0, G_116_0, G_118_0, G_120_0, G_122_0, G_124_0, G_126_0;
|
||||
|
||||
// parallel-prefix, Ladner-Fischer
|
||||
|
||||
|
@ -29,6 +29,22 @@ module ladner_fischer64 (c, p, g);
|
||||
|
||||
output [64:1] c;
|
||||
|
||||
logic G_1_0,G_3_2,P_3_2,G_5_4,P_5_4,G_7_6,P_7_6,G_9_8,P_9_8,G_11_10,P_11_10,G_13_12,P_13_12,G_15_14,P_15_14
|
||||
,G_17_16,P_17_16,G_19_18,P_19_18,G_21_20,P_21_20,G_23_22,P_23_22,G_25_24,P_25_24,G_27_26,P_27_26,G_29_28,P_29_28
|
||||
,G_31_30,P_31_30,G_33_32,P_33_32,G_35_34,P_35_34,G_37_36,P_37_36,G_39_38,P_39_38,G_41_40,P_41_40,G_43_42,P_43_42
|
||||
,G_45_44,P_45_44,G_47_46,P_47_46,G_49_48,P_49_48,G_51_50,P_51_50,G_53_52,P_53_52,G_55_54,P_55_54,G_57_56,P_57_56
|
||||
,G_59_58,P_59_58,G_61_60,P_61_60,G_63_62,P_63_62,G_3_0,G_7_4,P_7_4,G_11_8,P_11_8,G_15_12,P_15_12,G_19_16,P_19_16
|
||||
,G_23_20,P_23_20,G_27_24,P_27_24,G_31_28,P_31_28,G_35_32,P_35_32,G_39_36,P_39_36,G_43_40,P_43_40,G_47_44,P_47_44
|
||||
,G_51_48,P_51_48,G_55_52,P_55_52,G_59_56,P_59_56,G_63_60,P_63_60,G_5_0,G_7_0,G_13_8,P_13_8,G_15_8,P_15_8,G_21_16
|
||||
,P_21_16,G_23_16,P_23_16,G_29_24,P_29_24,G_31_24,P_31_24,G_37_32,P_37_32,G_39_32,P_39_32,G_45_40,P_45_40,G_47_40
|
||||
,P_47_40,G_53_48,P_53_48,G_55_48,P_55_48,G_61_56,P_61_56,G_63_56,P_63_56,G_9_0,G_11_0,G_13_0,G_15_0,G_25_16
|
||||
,P_25_16,G_27_16,P_27_16,G_29_16,P_29_16,G_31_16,P_31_16,G_41_32,P_41_32,G_43_32,P_43_32,G_45_32,P_45_32,G_47_32
|
||||
,P_47_32,G_57_48,P_57_48,G_59_48,P_59_48,G_61_48,P_61_48,G_63_48,P_63_48,G_17_0,G_19_0,G_21_0,G_23_0,G_25_0,G_27_0
|
||||
,G_29_0,G_31_0,G_49_32,P_49_32,G_51_32,P_51_32,G_53_32,P_53_32,G_55_32,P_55_32,G_57_32,P_57_32,G_59_32,P_59_32
|
||||
,G_61_32,P_61_32,G_63_32,P_63_32,G_33_0,G_35_0,G_37_0,G_39_0,G_41_0,G_43_0,G_45_0,G_47_0,G_49_0,G_51_0,G_53_0
|
||||
,G_55_0,G_57_0,G_59_0,G_61_0,G_63_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0,G_14_0,G_16_0,G_18_0,G_20_0,G_22_0
|
||||
,G_24_0,G_26_0,G_28_0,G_30_0,G_32_0,G_34_0,G_36_0,G_38_0,G_40_0,G_42_0,G_44_0,G_46_0,G_48_0,G_50_0,G_52_0
|
||||
,G_54_0,G_56_0,G_58_0,G_60_0,G_62_0;
|
||||
// parallel-prefix, Ladner-Fischer
|
||||
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
|
@ -19,7 +19,7 @@ module lza(sum, normcnt, sumzero);
|
||||
// Internal nodes
|
||||
|
||||
reg [8:0] i; // loop index
|
||||
|
||||
|
||||
// A real LOP uses a fast carry chain to find only the first 0.
|
||||
// It is an example of a parallel prefix algorithm. For the sake
|
||||
// of simplicity, this model is behavioral instead.
|
||||
|
@ -240,6 +240,7 @@ module multiplier( y, x, Sum, Carry );
|
||||
|
||||
// Below are the nets for the partial products (booth)
|
||||
wire pp_0_0;
|
||||
wire pp_0_1;
|
||||
wire pp_0_2;
|
||||
wire pp_1_2;
|
||||
wire pp_0_3;
|
||||
|
@ -16,17 +16,18 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
wire [26:0][1:0] add1;
|
||||
wire [26:0][54:0] pp;
|
||||
wire [26:0] e;
|
||||
logic [17:0][105:0] lv1add;
|
||||
logic [11:0][105:0] lv2add;
|
||||
logic [7:0][105:0] lv3add;
|
||||
logic [3:0][105:0] lv4add;
|
||||
logic [21:0][106:0] carryTmp;
|
||||
wire [26:0][105:0] acc;
|
||||
logic [106:0] tmpsE;
|
||||
logic [17:0][106:0] lv1add;
|
||||
logic [11:0][106:0] lv2add;
|
||||
logic [7:0][106:0] lv3add;
|
||||
logic [3:0][106:0] lv4add;
|
||||
logic [21:0][107:0] carryTmp;
|
||||
wire [26:0][106:0] acc;
|
||||
// wire [105:0] acc
|
||||
genvar i;
|
||||
|
||||
assign xExt = {2'b0,~(xdenormE|xzeroE),xman};
|
||||
assign yExt = {2'b0,~(ydenormE|yzeroE),yman, 1'b0};
|
||||
assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
|
||||
assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
|
||||
|
||||
generate
|
||||
for(i=0; i<27; i=i+1) begin
|
||||
@ -35,69 +36,70 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
endgenerate
|
||||
|
||||
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
|
||||
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
|
||||
assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
|
||||
assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
|
||||
assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
|
||||
assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
|
||||
assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
|
||||
assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
|
||||
assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
|
||||
assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
|
||||
assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
|
||||
assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
|
||||
assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
|
||||
assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
|
||||
assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
|
||||
assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
|
||||
assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
|
||||
assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
|
||||
assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
|
||||
assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
|
||||
assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
|
||||
assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
|
||||
assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
|
||||
assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
|
||||
assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
|
||||
assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
|
||||
assign acc[26] = {pp[26],add1[25], 50'b0};
|
||||
|
||||
//*** resize adders
|
||||
generate
|
||||
for(i=0; i<9; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0};
|
||||
add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
|
||||
.carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
|
||||
assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<6; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0};
|
||||
add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
|
||||
.carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
|
||||
assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for(i=0; i<4; i=i+1) begin
|
||||
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0};
|
||||
add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
|
||||
.carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
|
||||
assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
generate
|
||||
for(i=0; i<2; i=i+1) begin
|
||||
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
|
||||
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
|
||||
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0};
|
||||
assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(sE));
|
||||
add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
|
||||
.carry(carryTmp[21]), .sum(tmpsE));
|
||||
assign sE = tmpsE[105:0];
|
||||
assign rE = {carryTmp[21][104:0], 1'b0};
|
||||
// assign rE = 0;
|
||||
// assign sE = acc[0] +
|
||||
@ -131,3 +133,4 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
|
||||
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
|
||||
// assign rE = 0;
|
||||
endmodule
|
||||
|
||||
|
@ -56,8 +56,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
// The sticky bit calculation is actually built into the shifter and
|
||||
// does not require a true subtraction shown in the model.
|
||||
|
||||
assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2;
|
||||
assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
|
||||
assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
|
||||
// assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
|
||||
always_comb
|
||||
begin
|
||||
// d = aligncntM
|
||||
@ -65,19 +65,19 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
// p = 53
|
||||
// ea + eb = aeM
|
||||
// set d<=2 to d<=0
|
||||
if ($signed(aligncntM)<=$signed(2)) begin //d<=2
|
||||
if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
|
||||
// product anchored or cancellation
|
||||
if ($signed(aeM-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin
|
||||
if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
|
||||
//normal result
|
||||
de0 = xzeroM|yzeroM ? zexp : aeM-normcnt+xdenormM+ydenormM+57;
|
||||
de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
|
||||
resultdenorm = |sum & ~|de0 | de0[12];
|
||||
// if z is zero then there was a 56 bit shift of the product
|
||||
sumshifted = resultdenorm ? sum << sumshiftM-zzeroM+isShiftLeft1 : sum << normcnt; // p+2+l
|
||||
sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
//de0 = aeM-normcnt+2-1023;
|
||||
end else begin
|
||||
sumshifted = sum << (1080+aeM);
|
||||
sumshifted = sum << (13'd1080+aeM);
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
resultdenorm = 1;
|
||||
@ -96,29 +96,29 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
// the book says exp = zexp + {-1,0,1}
|
||||
if(sumshiftzeroM) begin
|
||||
v = sum[162:109];
|
||||
sticky = sum[108:0] | bsM;
|
||||
de0 = zexp;
|
||||
sticky = (|sum[108:0]) | bsM;
|
||||
de0 = {2'b0,zexp};
|
||||
end else if(sumshifted[163] & ~sumshifttmp[9])begin
|
||||
v = sumshifted[162:109];
|
||||
sticky = (|sumshifted[108:0]) | bsM;
|
||||
de0 = zexp +2;
|
||||
de0 = {2'b0,zexp} +13'd2;
|
||||
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
|
||||
v = sumshifted[161:108];
|
||||
sticky = (|sumshifted[107:0]) | bsM;
|
||||
de0 = zexp+1;
|
||||
de0 = {2'b0,zexp}+13'd1;
|
||||
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp+zdenormM;
|
||||
de0 = {2'b0,zexp}+{12'b0,zdenormM};
|
||||
end else if(sumshifted[160]& ~zdenormM) begin
|
||||
de0 = zexp-1;
|
||||
de0 = {2'b0,zexp}-13'b1;
|
||||
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
|
||||
sticky = (|sumshifted[105:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
end else if(sumshifted[159]& ~zdenormM) begin
|
||||
//v = sumshifted[158:105];
|
||||
de0 = zexp-2;
|
||||
de0 = {2'b0,zexp}-13'd2;
|
||||
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
|
||||
sticky = (|sumshifted[104:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
@ -126,7 +126,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
v = sumshifted[160:107];
|
||||
sticky = (|sumshifted[106:0]) | bsM;
|
||||
//de0 = zexp-1;
|
||||
de0 = zexp;
|
||||
de0 = {{2{zexp[62]}},zexp};
|
||||
end else begin
|
||||
de0 = 0;
|
||||
sumshifted = sum << sumshiftM-1; // p+2+l
|
||||
@ -144,3 +144,4 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
// Date: 11/2/1995
|
||||
//
|
||||
// Block Description:
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z inputs, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
|
||||
//
|
||||
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
|
||||
// muxed in to form the actual result for register file writeback. This
|
||||
@ -24,14 +24,14 @@ module round(v, sticky, FrmM, wsign,
|
||||
input logic [2:0] FrmM;
|
||||
input logic wsign; // Sign of result
|
||||
input logic [4:0] FmaFlagsM;
|
||||
input logic inf; // Some input is infinity
|
||||
input logic nanM; // Some input is NaN
|
||||
input logic inf; // Some input logic is infinity
|
||||
input logic nanM; // Some input logic is NaN
|
||||
input logic xnanM; // X is NaN
|
||||
input logic ynanM; // Y is NaN
|
||||
input logic znanM; // Z is NaN
|
||||
input logic [51:0] xman; // Input X
|
||||
input logic [51:0] yman; // Input Y
|
||||
input logic [51:0] zman; // Input Z
|
||||
input logic [51:0] xman; // input logic X
|
||||
input logic [51:0] yman; // input logic Y
|
||||
input logic [51:0] zman; // input logic Z
|
||||
output logic [51:0] wman; // rounded result of FMAC
|
||||
output logic infinity; // Generate infinity on overflow
|
||||
output logic specialsel; // Select special result
|
||||
@ -85,7 +85,7 @@ module round(v, sticky, FrmM, wsign,
|
||||
// The special result mux is a 4:1 mux that should not appear in the
|
||||
// critical path of the machine. It is not priority encoded, despite
|
||||
// the code below suggesting otherwise. Also, several of the identical data
|
||||
// inputs to the wide muxes can be combined at the expense of more
|
||||
// input logics to the wide muxes can be combined at the expense of more
|
||||
// complicated non-critical control in the circuit implementation.
|
||||
|
||||
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
|
||||
@ -102,15 +102,15 @@ module round(v, sticky, FrmM, wsign,
|
||||
assign infinityres = infinity ? 52'b0 : {52{1'b1}};
|
||||
|
||||
// Invalid operations produce a quiet NaN. The result should
|
||||
// propagate an input if the input is NaN. Since we assume all
|
||||
// NaN inputs are already quiet, we don't have to force them quiet.
|
||||
// propagate an input logic if the input logic is NaN. Since we assume all
|
||||
// NaN input logics are already quiet, we don't have to force them quiet.
|
||||
|
||||
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
|
||||
|
||||
// IEEE 754-2008 section 6.2.3 states:
|
||||
// "If two or more inputs are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input NaNs will provide the payload."
|
||||
// "If two or more input logics are NaN, then the payload of the resulting NaN should be
|
||||
// identical to the payload of one of the input logic NaNs if representable in the destination
|
||||
// format. This standard does not specify which of the input logic NaNs will provide the payload."
|
||||
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
|
||||
|
||||
// Select result with 4:1 mux
|
||||
|
@ -238,7 +238,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
|
||||
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
|
||||
) :
|
||||
(op_type[3]) ? exp_A_unmodified : Rexp;
|
||||
(op_type[3]) ? exp_A_unmodified[10:0] : Rexp; //KEP used to be all of exp_A_unmodified
|
||||
|
||||
// If the result is zero or infinity, the mantissa is all zeros.
|
||||
// If the result is NaN, the mantissa is 10...0
|
||||
|
@ -66,7 +66,8 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
wire [1:0] mux_mant;
|
||||
wire sign_rem;
|
||||
wire [63:0] q, qm, qp;
|
||||
wire exp_ovf, exp_ovfSP, exp_ovfDP;
|
||||
wire exp_ovf, exp_ovfSP, exp_ovfDP;
|
||||
logic zero_rem;
|
||||
|
||||
// Remainder = 0?
|
||||
assign zero_rem = ~(|regr_out);
|
||||
@ -97,7 +98,7 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
// 1.) we choose any qm0, qp0, q0 (since we shift mant)
|
||||
// 2.) we choose qp and we overflow (for RU)
|
||||
assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})};
|
||||
assign Texp = exp_diff - {{13{vss}}, ~q1[63]} + {{13{vss}}, mux_mant[1]&qp1[63]&~exp_ovf};
|
||||
assign Texp = exp_diff - {{12{vss}}, ~q1[63]} + {{12{vss}}, mux_mant[1]&qp1[63]&~exp_ovf}; // KEP used to be 13{vss}
|
||||
|
||||
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
|
||||
// all ones. To encourage sharing with single precision overflow detection,
|
||||
|
@ -12,7 +12,8 @@ module sbtm2 (input logic [11:0] a, output logic [10:0] y);
|
||||
// input to CPA
|
||||
logic [14:0] op1;
|
||||
logic [14:0] op2;
|
||||
logic [14:0] p;
|
||||
logic [14:0] p;
|
||||
logic cout;
|
||||
|
||||
assign x0 = a[11:7];
|
||||
assign x1 = a[6:4];
|
||||
|
@ -25,7 +25,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
input logic [4:0] FmaFlagsM; // Round toward minus infinity
|
||||
input logic sumzero; // Sum = O
|
||||
input logic zinfM; // Y = Inf
|
||||
input logic inf; // Some input = Inf
|
||||
input logic inf; // Some input logic = Inf
|
||||
output logic wsign; // Sign of W
|
||||
output logic invz; // Invert addend into adder
|
||||
output logic negsum; // Negate result of adder
|
||||
@ -36,6 +36,9 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
wire zerosign; // sign if result= 0
|
||||
wire sumneg; // sign if result= 0
|
||||
wire infsign; // sign if result= Inf
|
||||
logic tmp;
|
||||
logic psign;
|
||||
|
||||
// Compute sign of product
|
||||
|
||||
assign psign = xsign ^ ysign;
|
||||
@ -55,7 +58,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
|
||||
//always @(invz or negsum0 or negsum1 or bsM or ps)
|
||||
// begin
|
||||
// if (~invz) begin // both inputs have same sign
|
||||
// if (~invz) begin // both input logics have same sign
|
||||
// negsum = 0;
|
||||
// selsum1 = 0;
|
||||
// end else if (bsM) begin // sticky bit set on addend
|
||||
@ -80,7 +83,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
|
||||
// Sign calculation is not in the critical path so the cases
|
||||
// can be tolerated.
|
||||
// IEEE 754-2008 section 6.3 states
|
||||
// "When ether an input or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
|
||||
// also pertaining to negZero it states:
|
||||
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
|
||||
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero
|
||||
|
@ -25,7 +25,7 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
|
||||
output logic znanE; // ReadData3E is NaN
|
||||
output logic xdenormE; // ReadData1E is denormalized
|
||||
output logic ydenormE; // ReadData2E is denormalized
|
||||
output logic zdenormE; // ReadData3E is denormalized
|
||||
output logic zdenormE; // ReadData3E is denormalized
|
||||
output logic xinfE; // ReadData1E is infinity
|
||||
output logic yinfE; // ReadData2E is infinity
|
||||
output logic zinfE; // ReadData3E is infinity
|
||||
@ -60,7 +60,7 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
|
||||
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
|
||||
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
|
||||
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
|
||||
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number
|
||||
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
|
||||
assign xzeroE = ~(|ReadData1E[62:0]);
|
||||
assign yzeroE = ~(|ReadData2E[62:0]);
|
||||
assign zzeroE = ~(|ReadData3E[62:0]);
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
module privdec (
|
||||
input logic [31:20] InstrM,
|
||||
input logic PrivilegedM, IllegalIEUInstrFaultM, IllegalCSRAccessM,
|
||||
input logic PrivilegedM, IllegalIEUInstrFaultM, IllegalCSRAccessM, IllegalFPUInstrM,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic STATUS_TSR,
|
||||
output logic IllegalInstrFaultM,
|
||||
@ -47,7 +47,7 @@ module privdec (
|
||||
assign wfiM = PrivilegedM & (InstrM[31:20] == 12'b000100000101);
|
||||
assign sfencevmaM = PrivilegedM & (InstrM[31:25] == 7'b0001001);
|
||||
assign IllegalPrivilegedInstrM = PrivilegedM & ~(uretM|sretM|mretM|ecallM|ebreakM|wfiM|sfencevmaM);
|
||||
assign IllegalInstrFaultM = IllegalIEUInstrFaultM | IllegalPrivilegedInstrM | IllegalCSRAccessM; // *** generalize this for other instructions
|
||||
assign IllegalInstrFaultM = (IllegalIEUInstrFaultM & IllegalFPUInstrM) | IllegalPrivilegedInstrM | IllegalCSRAccessM | IllegalFPUInstrM; // *** generalize this for other instructions
|
||||
|
||||
// *** initially, wfi and sfencevma are nop
|
||||
// *** zfenci extension?
|
||||
|
@ -46,7 +46,7 @@ module privileged (
|
||||
input logic PrivilegedM,
|
||||
input logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM,
|
||||
input logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM,
|
||||
input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD,
|
||||
input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD, IllegalFPUInstrD,
|
||||
input logic LoadMisalignedFaultM,
|
||||
input logic StoreMisalignedFaultM,
|
||||
input logic TimerIntM, ExtIntM, SwIntM,
|
||||
@ -78,6 +78,7 @@ module privileged (
|
||||
logic uretM, sretM, mretM, ecallM, ebreakM, wfiM, sfencevmaM;
|
||||
logic IllegalCSRAccessM;
|
||||
logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
|
||||
logic IllegalFPUInstrE, IllegalFPUInstrM;
|
||||
logic LoadPageFaultM, StorePageFaultM;
|
||||
logic InstrPageFaultF, InstrPageFaultD, InstrPageFaultE, InstrPageFaultM;
|
||||
logic InstrAccessFaultF, InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
|
||||
@ -158,12 +159,12 @@ module privileged (
|
||||
flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD,
|
||||
{InstrPageFaultF, InstrAccessFaultF},
|
||||
{InstrPageFaultD, InstrAccessFaultD});
|
||||
flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE,
|
||||
{IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE});
|
||||
flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM,
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE},
|
||||
{IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM});
|
||||
flopenrc #(4) faultregE(clk, reset, FlushE, ~StallE,
|
||||
{IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD, IllegalFPUInstrD}, // ** vs IllegalInstrFaultInD
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE});
|
||||
flopenrc #(4) faultregM(clk, reset, FlushM, ~StallM,
|
||||
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE},
|
||||
{IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM, IllegalFPUInstrM});
|
||||
|
||||
trap trap(.*);
|
||||
|
||||
|
@ -96,7 +96,7 @@ module wallypipelinedhart (
|
||||
logic SquashSCW;
|
||||
logic [31:0] FSROutW;
|
||||
logic DivSqrtDoneE;
|
||||
logic FInvalInstrD;
|
||||
logic IllegalFPUInstrD;
|
||||
logic [`XLEN-1:0] FPUResultW;
|
||||
|
||||
// memory management unit signals
|
||||
@ -174,7 +174,7 @@ module wallypipelinedhart (
|
||||
privileged priv(.*);
|
||||
|
||||
|
||||
// fpu fpu(.*); // floating point unit
|
||||
fpu fpu(.*); // floating point unit
|
||||
// add FPU here, with SetFflagsM, FRM_REGW
|
||||
// presently stub out SetFlagsM and FloatRegWriteW
|
||||
//assign SetFflagsM = 0;
|
||||
|
@ -416,18 +416,6 @@ module testbench();
|
||||
`CHECK_CSR2(STVAL, `CSRS)
|
||||
`CHECK_CSR(STVEC)
|
||||
|
||||
//$stop;
|
||||
generate
|
||||
if (`BUSYBEAR == 1) begin
|
||||
initial begin //this is temporary until the bug can be fixed!!!
|
||||
#11130100;
|
||||
force dut.hart.ieu.dp.regf.rf[5] = 64'h0000000080000004;
|
||||
#100;
|
||||
release dut.hart.ieu.dp.regf.rf[5];
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
logic speculative;
|
||||
initial begin
|
||||
speculative = 0;
|
||||
|
@ -52,7 +52,71 @@ module testbench();
|
||||
|
||||
string tests64f[] = '{
|
||||
"rv64f/I-FADD-S-01", "2000",
|
||||
"rv64f/I-FCLASS-S-01", "2000"
|
||||
"rv64f/I-FCLASS-S-01", "2000",
|
||||
"rv64f/I-FCVT-S-L-01", "2000",
|
||||
"rv64f/I-FCVT-S-LU-01", "2000",
|
||||
"rv64f/I-FCVT-S-W-01", "2000",
|
||||
"rv64f/I-FCVT-S-WU-01", "2000",
|
||||
"rv64f/I-FCVT-L-S-01", "2000",
|
||||
"rv64f/I-FCVT-LU-S-01", "2000",
|
||||
"rv64f/I-FCVT-W-S-01", "2000",
|
||||
"rv64f/I-FCVT-WU-S-01", "2000",
|
||||
"rv64f/I-FDIV-S-01", "2000",
|
||||
"rv64f/I-FEQ-S-01", "2000",
|
||||
"rv64f/I-FLE-S-01", "2000",
|
||||
"rv64f/I-FLT-S-01", "2000",
|
||||
"rv64f/I-FMADD-S-01", "2000",
|
||||
"rv64f/I-FMAX-S-01", "2000",
|
||||
"rv64f/I-FMIN-S-01", "2000",
|
||||
"rv64f/I-FMSUB-S-01", "2000",
|
||||
"rv64f/I-FMUL-S-01", "2000",
|
||||
"rv64f/I-FMV-W-X-01", "2000",
|
||||
"rv64f/I-FMV-X-W-01", "2000",
|
||||
"rv64f/I-FNMADD-S-01", "2000",
|
||||
"rv64f/I-FNMSUB-S-01", "2000",
|
||||
"rv64f/I-FSGNJ-S-01", "2000",
|
||||
"rv64f/I-FSGNJN-S-01", "2000",
|
||||
"rv64f/I-FSGNJX-S-01", "2000",
|
||||
"rv64f/I-FSQRT-S-01", "2000",
|
||||
"rv64f/I-FSW-01", "2000",
|
||||
"rv64f/I-FLW-01", "2110",
|
||||
"rv64f/I-FSUB-S-01", "2000"
|
||||
};
|
||||
|
||||
|
||||
string tests64d[] = '{
|
||||
"rv64d/I-FADD-D-01", "2000",
|
||||
"rv64d/I-FCLASS-D-01", "2000",
|
||||
"rv64d/I-FCVT-D-L-01", "2000",
|
||||
"rv64d/I-FCVT-D-LU-01", "2000",
|
||||
"rv64d/I-FCVT-D-S-01", "2000",
|
||||
"rv64d/I-FCVT-D-W-01", "2000",
|
||||
"rv64d/I-FCVT-D-WU-01", "2000",
|
||||
"rv64d/I-FCVT-L-D-01", "2000",
|
||||
"rv64d/I-FCVT-LU-D-01", "2000",
|
||||
"rv64d/I-FCVT-S-D-01", "2000",
|
||||
"rv64d/I-FCVT-W-D-01", "2000",
|
||||
"rv64d/I-FCVT-WU-D-01", "2000",
|
||||
"rv64d/I-FDIV-D-01", "2000",
|
||||
"rv64d/I-FEQ-D-01", "2000",
|
||||
"rv64d/I-FLD-D-01", "2420",
|
||||
"rv64d/I-FLE-D-01", "2000",
|
||||
"rv64d/I-FLT-D-01", "2000",
|
||||
"rv64d/I-FMADD-D-01", "2000",
|
||||
"rv64d/I-FMAX-D-01", "2000",
|
||||
"rv64d/I-FMIN-D-01", "2000",
|
||||
"rv64d/I-FMSUB-D-01", "2000",
|
||||
"rv64d/I-FMUL-D-01", "2000",
|
||||
"rv64d/I-FMV-D-X-01", "2000",
|
||||
"rv64d/I-FMV-X-D-01", "2000",
|
||||
"rv64d/I-FNMADD-D-01", "2000",
|
||||
"rv64d/I-FNMSUB-D-01", "2000",
|
||||
"rv64d/I-FSD-01", "2000",
|
||||
"rv64d/I-FSGNJ-D-01", "2000",
|
||||
"rv64d/I-FSGNJN-D-01", "2000",
|
||||
"rv64d/I-FSGNJX-D-01", "2000",
|
||||
"rv64d/I-FSQRTD-01", "2000",
|
||||
"rv64d/I-FSUB-D-01", "2000"
|
||||
};
|
||||
|
||||
string tests64a[] = '{
|
||||
@ -259,6 +323,40 @@ module testbench();
|
||||
"rv32i/I-MISALIGN_JMP-01","2000"
|
||||
};
|
||||
|
||||
string tests32f[] = '{
|
||||
"rv32f/I-FADD-S-01", "2000",
|
||||
"rv32f/I-FCLASS-S-01", "2000",
|
||||
"rv32f/I-FCVT-S-L-01", "2000",
|
||||
"rv32f/I-FCVT-S-LU-01", "2000",
|
||||
"rv32f/I-FCVT-S-W-01", "2000",
|
||||
"rv32f/I-FCVT-S-WU-01", "2000",
|
||||
"rv32f/I-FCVT-L-S-01", "2000",
|
||||
"rv32f/I-FCVT-LU-S-01", "2000",
|
||||
"rv32f/I-FCVT-W-S-01", "2000",
|
||||
"rv32f/I-FCVT-WU-S-01", "2000",
|
||||
"rv32f/I-FDIV-S-01", "2000",
|
||||
"rv32f/I-FEQ-S-01", "2000",
|
||||
"rv32f/I-FLE-S-01", "2000",
|
||||
"rv32f/I-FLT-S-01", "2000",
|
||||
"rv32f/I-FMADD-S-01", "2000",
|
||||
"rv32f/I-FMAX-S-01", "2000",
|
||||
"rv32f/I-FMIN-S-01", "2000",
|
||||
"rv32f/I-FMSUB-S-01", "2000",
|
||||
"rv32f/I-FMUL-S-01", "2000",
|
||||
"rv32f/I-FMV-W-X-01", "2000",
|
||||
"rv32f/I-FMV-X-W-01", "2000",
|
||||
"rv32f/I-FNMADD-S-01", "2000",
|
||||
"rv32f/I-FNMSUB-S-01", "2000",
|
||||
"rv32f/I-FSGNJ-S-01", "2000",
|
||||
"rv32f/I-FSGNJN-S-01", "2000",
|
||||
"rv32f/I-FSGNJX-S-01", "2000",
|
||||
"rv32f/I-FSQRT-S-01", "2000",
|
||||
"rv32f/I-FSW-01", "2000",
|
||||
"rv32f/I-FLW-01", "2110",
|
||||
"rv32f/I-FSUB-S-01", "2000"
|
||||
};
|
||||
|
||||
|
||||
string tests32i[] = {
|
||||
"rv32i/I-ADD-01", "2000",
|
||||
"rv32i/I-ADDI-01","2000",
|
||||
@ -617,11 +715,13 @@ module instrNameDecTB(
|
||||
logic [2:0] funct3;
|
||||
logic [6:0] funct7;
|
||||
logic [11:0] imm;
|
||||
logic [4:0] rs2;
|
||||
|
||||
assign op = instr[6:0];
|
||||
assign funct3 = instr[14:12];
|
||||
assign funct7 = instr[31:25];
|
||||
assign imm = instr[31:20];
|
||||
assign rs2 = instr[24:20];
|
||||
|
||||
// it would be nice to add the operands to the name
|
||||
// create another variable called decoded
|
||||
@ -745,6 +845,67 @@ module instrNameDecTB(
|
||||
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D";
|
||||
else name = "ILLEGAL";
|
||||
10'b0001111_???: name = "FENCE";
|
||||
10'b1000011_???: name = "FMADD";
|
||||
10'b1000111_???: name = "FMSUB";
|
||||
10'b1001011_???: name = "FNMSUB";
|
||||
10'b1001111_???: name = "FNMADD";
|
||||
10'b1010011_000: if (funct7[6:2] == 5'b00000) name = "FADD";
|
||||
else if (funct7[6:2] == 5'b00001) name = "FSUB";
|
||||
else if (funct7[6:2] == 5'b00010) name = "FMUL";
|
||||
else if (funct7[6:2] == 5'b00011) name = "FDIV";
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else if (funct7 == 7'b1110000 && rs2 == 5'b00000) name = "FMV.X.W";
|
||||
else if (funct7 == 7'b1111000 && rs2 == 5'b00000) name = "FMV.W.X";
|
||||
else if (funct7 == 7'b1110001 && rs2 == 5'b00000) name = "FMV.X.W"; // DOUBLE
|
||||
else if (funct7 == 7'b1111001 && rs2 == 5'b00000) name = "FMV.W.X"; // DOUBLE
|
||||
else if (funct7[6:2] == 5'b00100) name = "FSGNJ";
|
||||
else if (funct7[6:2] == 5'b00101) name = "FMIN";
|
||||
else if (funct7[6:2] == 5'b10100) name = "FLE";
|
||||
else name = "ILLEGAL";
|
||||
10'b1010011_001: if (funct7[6:2] == 5'b00000) name = "FADD";
|
||||
else if (funct7[6:2] == 5'b00001) name = "FSUB";
|
||||
else if (funct7[6:2] == 5'b00010) name = "FMUL";
|
||||
else if (funct7[6:2] == 5'b00011) name = "FDIV";
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else if (funct7[6:2] == 5'b00100) name = "FSGNJN";
|
||||
else if (funct7[6:2] == 5'b00101) name = "FMAX";
|
||||
else if (funct7[6:2] == 5'b10100) name = "FLT";
|
||||
else if (funct7[6:2] == 5'b11100) name = "FCLASS";
|
||||
else name = "ILLEGAL";
|
||||
10'b0101111_010: if (funct7[6:2] == 5'b00000) name = "FADD";
|
||||
else if (funct7[6:2] == 5'b00001) name = "FSUB";
|
||||
else if (funct7[6:2] == 5'b00010) name = "FMUL";
|
||||
else if (funct7[6:2] == 5'b00011) name = "FDIV";
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else if (funct7[6:2] == 5'b00100) name = "FSGNJX";
|
||||
else if (funct7[6:2] == 5'b10100) name = "FEQ";
|
||||
else name = "ILLEGAL";
|
||||
10'b1010011_???: if (funct7[6:2] == 5'b00000) name = "FADD";
|
||||
else if (funct7[6:2] == 5'b00001) name = "FSUB";
|
||||
else if (funct7[6:2] == 5'b00010) name = "FMUL";
|
||||
else if (funct7[6:2] == 5'b00011) name = "FDIV";
|
||||
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
|
||||
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
|
||||
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
|
||||
else name = "ILLEGAL";
|
||||
10'b0000111_010: name = "FLW";
|
||||
10'b0100111_010: name = "FSW";
|
||||
10'b0000111_010: name = "FLD";
|
||||
10'b0100111_010: name = "FSD";
|
||||
default: name = "ILLEGAL";
|
||||
endcase
|
||||
endmodule
|
||||
|
Loading…
Reference in New Issue
Block a user