This commit is contained in:
bbracker 2021-05-03 09:23:52 -04:00
commit 2368b58cc9
57 changed files with 1161 additions and 836 deletions

View File

@ -15,16 +15,16 @@ module add(rM, sM, tM, sum,
negsum, invz, selsum1, negsum0, negsum1, killprodM); negsum, invz, selsum1, negsum0, negsum1, killprodM);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
input [105:0] rM; // partial product 1 input logic [105:0] rM; // partial product 1
input [105:0] sM; // partial product 2 input logic [105:0] sM; // partial product 2
input [163:0] tM; // aligned addend input logic [163:0] tM; // aligned addend
input invz; // invert addend input logic invz; // invert addend
input selsum1; // select +1 mode of compound adder input logic selsum1; // select +1 mode of compound adder
input killprodM; // z >> product input logic killprodM; // z >> product
input negsum; // Negate sum input logic negsum; // Negate sum
output [163:0] sum; // sum output logic [163:0] sum; // sum
output negsum0; // sum was negative in +0 mode output logic negsum0; // sum was negative in +0 mode
output negsum1; // sum was negative in +1 mode output logic negsum1; // sum was negative in +1 mode
// Internal nodes // Internal nodes
@ -34,6 +34,7 @@ module add(rM, sM, tM, sum,
wire [164:0] sum0; // sum of compound adder +0 mode wire [164:0] sum0; // sum of compound adder +0 mode
wire [164:0] sum1; // sum of compound adder +1 mode wire [164:0] sum1; // sum of compound adder +1 mode
wire [163:0] prodshifted; // sum of compound adder +1 mode wire [163:0] prodshifted; // sum of compound adder +1 mode
wire [164:0] tmp; // sum of compound adder +1 mode
// Invert addend if z'sM sign is diffrent from the product'sM sign // Invert addend if z'sM sign is diffrent from the product'sM sign
@ -44,11 +45,13 @@ module add(rM, sM, tM, sum,
assign r2 = killprodM ? 106'b0 : rM; assign r2 = killprodM ? 106'b0 : rM;
assign s2 = killprodM ? 106'b0 : sM; assign s2 = killprodM ? 106'b0 : sM;
//***replace this with a more structural cpa that synthisises better
// Compound adder // Compound adder
// Consists of 3:2 CSA followed by long compound CPA // Consists of 3:2 CSA followed by long compound CPA
assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0}; //assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0; //assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
// Check sign bits in +0/1 modes // Check sign bits in +0/1 modes
assign negsum0 = sum0[164]; assign negsum0 = sum0[164];
@ -59,3 +62,4 @@ module add(rM, sM, tM, sum,
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]); assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
endmodule endmodule

View File

@ -15,79 +15,63 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
killprodE, sumshiftE, sumshiftzeroE); killprodE, sumshiftE, sumshiftzeroE);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [51:0] zman; // Fraction of addend z; input logic [51:0] zman; // Fraction of addend z;
input [12:0] aligncntE; // amount to shift input logic [12:0] aligncntE; // amount to shift
input xzeroE; // Input X = 0 input logic xzeroE; // Input X = 0
input yzeroE; // Input Y = 0 input logic yzeroE; // Input Y = 0
input zzeroE; // Input Z = 0 input logic zzeroE; // Input Z = 0
input zdenormE; // Input Z is denormalized input logic zdenormE; // Input Z is denormalized
output [163:0] tE; // aligned addend (54 bits left of bpt) output logic [163:0] tE; // aligned addend (54 bits left of bpt)
output bsE; // sticky bit of addend output logic bsE; // sticky bit of addend
output killprodE; // Z >> product output logic killprodE; // Z >> product
output [7:0] sumshiftE; output logic [8:0] sumshiftE;
output sumshiftzeroE; output logic sumshiftzeroE;
// Internal nodes // Internal nodes
reg [163:0] tE; // aligned addend from shifter
reg [215:0] shift; // aligned addend from shifter reg [215:0] shift; // aligned addend from shifter
reg killprodE; // Z >> product logic [12:0] tmp;
reg bsE; // sticky bit of addend
reg ps; // sticky bit of product
reg zexpsel; // sticky bit of product
reg [7:0] i; // temp storage for finding sticky bit
wire [52:0] z1; // Z plus 1
wire [51:0] z2; // Z selected after handling rounds
wire [11:0] align104; // alignment count + 104
logic [8:0] sumshiftE;
logic sumshiftzeroE;
// Compute sign of aligncntE + 104 to check for shifting too far right always_comb
//assign align104 = aligncntE+104;
// Shift addend by alignment count. Generate sticky bits from
// addend on right shifts. Handle special cases of shifting
// by too much.
always @(aligncntE or xzeroE or yzeroE or zman or zdenormE or zzeroE)
begin begin
// Default to clearing sticky bits // Default to clearing sticky bits
bsE = 0; bsE = 0;
ps = 0;
// And to using product as primary operand in adder I exponent gen // And to using product as primary operand in adder I exponent gen
killprodE = xzeroE | yzeroE; killprodE = xzeroE | yzeroE;
// d = aligncntE // d = aligncntE
// p = 53 // p = 53
if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1 //***try reducing this hardware to use one shifter
if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
//product ancored case with saturated shift //product ancored case with saturated shift
sumshiftE = 163; // 3p+4 sumshiftE = 163; // 3p+4
sumshiftzeroE = 0; sumshiftzeroE = 0;
shift = {1'b1,zman,163'b0} >> sumshiftE; shift = {1'b1,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
//zexpsel = 0;
end else if($signed(aligncntE) <= $signed(2)) begin // -2p+1<d<=2 end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
// product ancored or cancellation // product ancored or cancellation
sumshiftE = 57-aligncntE; // p + 2 - d tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0]; // p + 2 - d
sumshiftzeroE = 0; sumshiftzeroE = 0;
shift = {~zdenormE,zman,163'b0} >> sumshiftE; shift = {~zdenormE,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
//zexpsel = 0;
end else if ($signed(aligncntE)<=$signed(55)) begin // 2 < d <= p+2 end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
// addend ancored case // addend ancored case
// used to be 56 \/ somthing doesn'tE seem right too many typos // used to be 56 \/ somthing doesn't seem right too many typos
sumshiftE = 57-aligncntE; tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0];
sumshiftzeroE = 0; sumshiftzeroE = 0;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE; shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
//zexpsel = 1;
end else begin // d >= p+3 end else begin // d >= p+3
// addend anchored case with saturated shift // addend anchored case with saturated shift
sumshiftE = 0; sumshiftE = 0;
@ -96,15 +80,9 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
killprodE = 1; killprodE = 1;
//ps = 1;
//zexpsel = 1;
// use some behavioral code to find sticky bit. This is really
// done by hardware in the shifter.
//if (aligncntE < 0)
// for (i=0; i<-aligncntE-52; i = i+1)
// bsE = bsE || z2[i];
end end
end end
endmodule endmodule

View File

@ -1,21 +1,19 @@
module booth(xExt, choose, add1, e, pp); module booth(xExt, choose, add1, e, pp);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [53:0] xExt; // multiplicand xExt input logic [53:0] xExt; // multiplicand xExt
input [2:0] choose; // bits needed to choose which encoding input logic [2:0] choose; // bits needed to choose which encoding
output [1:0] add1; // do you add 1 output logic [1:0] add1; // do you add 1
output e; output logic e;
output [54:0] pp; // the resultant encoding output logic [54:0] pp; // the resultant encoding
logic [54:0] pp, temp; logic [54:0] temp;
logic e;
logic [1:0] add1;
logic [53:0] negx; logic [53:0] negx;
//logic temp; //logic temp;
assign negx = ~xExt; assign negx = ~xExt;
always @(choose, xExt, negx) always_comb
case (choose) case (choose)
3'b000 : pp = 55'b0; // 0 3'b000 : pp = 55'b0; // 0
3'b001 : pp = {1'b0, xExt}; // 1 3'b001 : pp = {1'b0, xExt}; // 1
@ -24,10 +22,10 @@ module booth(xExt, choose, add1, e, pp);
3'b100 : pp = {negx, 1'b0}; // -2 3'b100 : pp = {negx, 1'b0}; // -2
3'b101 : pp = {1'b1, negx}; // -1 3'b101 : pp = {1'b1, negx}; // -1
3'b110 : pp = {1'b1, negx}; // -1 3'b110 : pp = {1'b1, negx}; // -1
3'b111 : pp = 55'hfffffffffffffff; // -0 3'b111 : pp = '1; // -0
endcase endcase
always @(choose, xExt, negx) always_comb
case (choose) case (choose)
3'b000 : e = 0; // 0 3'b000 : e = 0; // 0
3'b001 : e = 0; // 1 3'b001 : e = 0; // 1
@ -40,7 +38,7 @@ module booth(xExt, choose, add1, e, pp);
endcase endcase
// assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0; // assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0;
// assign add1 = choose[2]; // assign add1 = choose[2];
always @(choose) always_comb
case (choose) case (choose)
3'b000 : add1 = 2'b0; // 0 3'b000 : add1 = 2'b0; // 0
3'b001 : add1 = 2'b0; // 1 3'b001 : add1 = 2'b0; // 1

View File

@ -3,11 +3,11 @@ module add3comp2(a, b, c, carry, sum);
//look into diffrent implementations of the compressors? //look into diffrent implementations of the compressors?
parameter BITS = 4; parameter BITS = 4;
input [BITS-1:0] a; input logic [BITS-1:0] a;
input [BITS-1:0] b; input logic [BITS-1:0] b;
input [BITS-1:0] c; input logic [BITS-1:0] c;
output [BITS-1:0] carry; output logic [BITS-1:0] carry;
output [BITS-1:0] sum; output logic [BITS-1:0] sum;
genvar i; genvar i;
generate generate
@ -22,12 +22,12 @@ module add4comp2(a, b, c, d, carry, sum);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
parameter BITS = 4; parameter BITS = 4;
input [BITS-1:0] a; input logic [BITS-1:0] a;
input [BITS-1:0] b; input logic [BITS-1:0] b;
input [BITS-1:0] c; input logic [BITS-1:0] c;
input [BITS-1:0] d; input logic [BITS-1:0] d;
output [BITS:0] carry; output logic [BITS:0] carry;
output [BITS-1:0] sum; output logic [BITS-1:0] sum;
logic [BITS-1:0] cout; logic [BITS-1:0] cout;
logic carryTmp; logic carryTmp;
@ -54,11 +54,11 @@ module sng3comp2(a, b, c, carry, sum);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
//look into diffrent implementations of the compressors? //look into diffrent implementations of the compressors?
input a; input logic a;
input b; input logic b;
input c; input logic c;
output carry; output logic carry;
output sum; output logic sum;
logic axorb; logic axorb;
@ -73,14 +73,14 @@ module sng4comp2(a, b, c, d, cin, cout, carry, sum);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
//look into pass gate 4:2 counters? //look into pass gate 4:2 counters?
input a; input logic a;
input b; input logic b;
input c; input logic c;
input d; input logic d;
input cin; input logic cin;
output cout; output logic cout;
output carry; output logic carry;
output sum; output logic sum;
logic TmpSum; logic TmpSum;

View File

@ -20,17 +20,17 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
aligncntE, prodof, aeE); aligncntE, prodof, aeE);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [62:52] xexp; // Exponent of multiplicand x input logic [62:52] xexp; // Exponent of multiplicand x
input [62:52] yexp; // Exponent of multiplicand y input logic [62:52] yexp; // Exponent of multiplicand y
input [62:52] zexp; // Exponent of addend z input logic [62:52] zexp; // Exponent of addend z
input xdenormE; // Z is denorm input logic xdenormE; // Z is denorm
input ydenormE; // Z is denorm input logic ydenormE; // Z is denorm
input zdenormE; // Z is denorm input logic zdenormE; // Z is denorm
input xzeroE; // Z is denorm input logic xzeroE; // Z is denorm
input yzeroE; // Z is denorm input logic yzeroE; // Z is denorm
output [12:0] aligncntE; // shift count for alignment shifter output logic [12:0] aligncntE; // shift count for alignment shifter
output prodof; // X*Y exponent out of bounds output logic prodof; // X*Y exponent out of bounds
output [12:0] aeE; //exponent of multiply output logic [12:0] aeE; //exponent of multiply
// Internal nodes // Internal nodes
@ -50,7 +50,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
// if exponent is out of bounds // if exponent is out of bounds
assign aeE = xzeroE|yzeroE ? 0 : xexp + yexp -1023; assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
assign prodof = (aeE > 2046 && ~aeE[12]); assign prodof = (aeE > 2046 && ~aeE[12]);
@ -61,7 +61,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
// is masked by the bypass mux and two 10 bit adder delays. // is masked by the bypass mux and two 10 bit adder delays.
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE; // assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE}; // assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
assign aligncntE = zexp -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE}; assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE; //assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
//assign aligncntE = zexp - aeE;// KEP use all of aeE //assign aligncntE = zexp - aeE;// KEP use all of aeE
@ -87,3 +87,4 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
// rounding mode. NaNs are propagated or generated. // rounding mode. NaNs are propagated or generated.
endmodule endmodule

View File

@ -23,24 +23,24 @@ module expgen2(xexp, yexp, zexp,
sumof, sumuf); sumof, sumuf);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [62:52] xexp; // Exponent of multiplicand x input logic [62:52] xexp; // Exponent of multiplicand x
input [62:52] yexp; // Exponent of multiplicand y input logic [62:52] yexp; // Exponent of multiplicand y
input [62:52] zexp; // Exponent of addend z input logic [62:52] zexp; // Exponent of addend z
input sumzero; // sum exactly equals zero input logic sumzero; // sum exactly equals zero
input resultdenorm; // postnormalize rounded result input logic resultdenorm; // postnormalize rounded result
input infinity; // generate infinity on overflow input logic infinity; // generate infinity on overflow
input [4:0] FmaFlagsM; // Result invalid input logic [4:0] FmaFlagsM; // Result invalid
input inf; // Some input is infinity input logic inf; // Some input is infinity
input nanM; // Some input is NaN input logic nanM; // Some input is NaN
input [12:0] de0; // X is NaN NaN input logic [12:0] de0; // X is NaN NaN
input xnanM; // X is NaN input logic xnanM; // X is NaN
input ynanM; // Y is NaN input logic ynanM; // Y is NaN
input znanM; // Z is NaN input logic znanM; // Z is NaN
input expplus1; input logic expplus1;
input specialsel; // Select special result input logic specialsel; // Select special result
output [62:52] wexp; // Exponent of result output logic [62:52] wexp; // Exponent of result
output sumof; // X*Y+Z exponent out of bounds output logic sumof; // X*Y+Z exponent out of bounds
output sumuf; // X*Y+Z exponent underflows output logic sumuf; // X*Y+Z exponent underflows
// Internal nodes // Internal nodes
@ -102,6 +102,7 @@ module expgen2(xexp, yexp, zexp,
// A mux selects the early result from other FPU blocks or the // A mux selects the early result from other FPU blocks or the
// normalized FMAC result. Special cases are also detected. // normalized FMAC result. Special cases are also detected.
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1; assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
endmodule endmodule

View File

@ -11,17 +11,17 @@
module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE); module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input xnanE; // X is NaN input logic xnanE; // X is NaN
input ynanE; // Y is NaN input logic ynanE; // Y is NaN
input znanE; // Z is NaN input logic znanE; // Z is NaN
input prodof; // X*Y overflows exponent input logic prodof; // X*Y overflows exponent
output nanE; // Some source is NaN output logic nanE; // Some source is NaN
// Internal nodes // Internal nodes
output prodinfE; // X*Y larger than max possible output logic prodinfE; // X*Y larger than max possible
// If any input is NaN, propagate the NaN // If any input logic is NaN, propagate the NaN
assign nanE = xnanE || ynanE || znanE; assign nanE = xnanE || ynanE || znanE;

View File

@ -13,27 +13,27 @@ module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof,
inf, nanM, FmaFlagsM,sticky,prodinfM); inf, nanM, FmaFlagsM,sticky,prodinfM);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input xnanM; // X is NaN input logic xnanM; // X is NaN
input ynanM; // Y is NaN input logic ynanM; // Y is NaN
input znanM; // Z is NaN input logic znanM; // Z is NaN
input xsign; // Sign of z input logic xsign; // Sign of z
input ysign; // Sign of z input logic ysign; // Sign of z
input zsign; // Sign of z input logic zsign; // Sign of z
input sticky; // X is Inf input logic sticky; // X is Inf
input prodinfM; input logic prodinfM;
input xinfM; // X is Inf input logic xinfM; // X is Inf
input yinfM; // Y is Inf input logic yinfM; // Y is Inf
input zinfM; // Z is Inf input logic zinfM; // Z is Inf
input sumof; // X*Y + z underflows exponent input logic sumof; // X*Y + z underflows exponent
input sumuf; // X*Y + z underflows exponent input logic sumuf; // X*Y + z underflows exponent
input xzeroM; // x = 0 input logic xzeroM; // x = 0
input yzeroM; // y = 0 input logic yzeroM; // y = 0
input zzeroM; // y = 0 input logic zzeroM; // y = 0
input killprodM; input logic killprodM;
input [1:0] vbits; // R and S bits of result input logic [1:0] vbits; // R and S bits of result
output inf; // Some source is Inf output logic inf; // Some source is Inf
output nanM; // Some source is NaN input logic nanM; // Some source is NaN
output [4:0] FmaFlagsM; output logic [4:0] FmaFlagsM;
// Internal nodes // Internal nodes
@ -55,8 +55,8 @@ logic suminf;
assign FmaFlagsM[2] = suminf && ~inf; assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases: // Set the underflow flag for the following cases:
// 1) Any input is denormalized // 1) Any input logic is denormalized
// 2) Output would be denormalized or smaller // 2) output logic would be denormalized or smaller
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM)); assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
@ -70,7 +70,7 @@ logic suminf;
// Set invalid flag for following cases: // Set invalid flag for following cases:
// 1) Inf - Inf // 1) Inf - Inf
// 2) 0 * Inf // 2) 0 * Inf
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj) // 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) || assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above

View File

@ -35,37 +35,37 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
xinfE, yinfE, zinfE, nanE, prodinfE); xinfE, yinfE, zinfE, nanE, prodinfE);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [63:0] ReadData1E; // input 1 input logic [63:0] ReadData1E; // input 1
input [63:0] ReadData2E; // input 2 input logic [63:0] ReadData2E; // input 2
input [63:0] ReadData3E; // input 3 input logic [63:0] ReadData3E; // input 3
input [2:0] FrmE; // Rounding mode input logic [2:0] FrmE; // Rounding mode
output [12:0] aligncntE; // status flags output logic [12:0] aligncntE; // status flags
output [105:0] rE; // one result of partial product sum output logic [105:0] rE; // one result of partial product sum
output [105:0] sE; // other result of partial products output logic [105:0] sE; // other result of partial products
output [163:0] tE; // output of alignment shifter output logic [163:0] tE; // output logic of alignment shifter
output [12:0] aeE; // multiplier expoent output logic [12:0] aeE; // multiplier expoent
output bsE; // sticky bit of addend output logic bsE; // sticky bit of addend
output killprodE; // ReadData3E >> product output logic killprodE; // ReadData3E >> product
output xzeroE; output logic xzeroE;
output yzeroE; output logic yzeroE;
output zzeroE; output logic zzeroE;
output xdenormE; output logic xdenormE;
output ydenormE; output logic ydenormE;
output zdenormE; output logic zdenormE;
output xinfE; output logic xinfE;
output yinfE; output logic yinfE;
output zinfE; output logic zinfE;
output xnanE; output logic xnanE;
output ynanE; output logic ynanE;
output znanE; output logic znanE;
output nanE; output logic nanE;
output prodinfE; output logic prodinfE;
output [8:0] sumshiftE; output logic [8:0] sumshiftE;
output sumshiftzeroE; output logic sumshiftzeroE;
// Internal nodes // Internal nodes
// output [12:0] aligncntE; // shift count for alignment // output logic [12:0] aligncntE; // shift count for alignment
logic prodof; // ReadData1E*ReadData2E out of range logic prodof; // ReadData1E*ReadData2E out of range
@ -95,7 +95,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
special special(.*); special special(.*);
// Instantiate control output // Instantiate control output logic
flag1 flag1(.*); flag1 flag1(.*);

View File

@ -15,13 +15,13 @@
// normalize Normalization shifter // normalize Normalization shifter
// round Rounding of result // round Rounding of result
// exception Handles exceptional cases // exception Handles exceptional cases
// bypass Handles bypass of result to ReadData1M or ReadData3M inputs // bypass Handles bypass of result to ReadData1M or ReadData3M input logics
// sign One bit sign handling block // sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.) // special Catch special cases (input logics = 0 / infinity / etc.)
// //
// The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by // The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to // RN, RZ, RM, or RP. The result is optionally bypassed back to
// the ReadData1M or ReadData3M inputs for use on the next cycle. In addition, four signals // the ReadData1M or ReadData3M input logics for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates // are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software; // an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IMMM flags. // the other three signals are IMMM flags.
@ -39,38 +39,38 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
); );
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [63:0] ReadData1M; // input 1 input logic [63:0] ReadData1M; // input logic 1
input [63:0] ReadData2M; // input 2 input logic [63:0] ReadData2M; // input logic 2
input [63:0] ReadData3M; // input 3 input logic [63:0] ReadData3M; // input logic 3
input [2:0] FrmM; // Rounding mode input logic [2:0] FrmM; // Rounding mode
input [12:0] aligncntM; // status flags input logic [12:0] aligncntM; // status flags
input [105:0] rM; // one result of partial product sum input logic [105:0] rM; // one result of partial product sum
input [105:0] sM; // other result of partial products input logic [105:0] sM; // other result of partial products
input [163:0] tM; // output of alignment shifter input logic [163:0] tM; // output of alignment shifter
input [8:0] normcntM; // shift count for normalizer input logic [8:0] normcntM; // shift count for normalizer
input [12:0] aeM; // multiplier expoent input logic [12:0] aeM; // multiplier expoent
input bsM; // sticky bit of addend input logic bsM; // sticky bit of addend
input killprodM; // ReadData3M >> product input logic killprodM; // ReadData3M >> product
input prodinfM; input logic prodinfM;
input xzeroM; input logic xzeroM;
input yzeroM; input logic yzeroM;
input zzeroM; input logic zzeroM;
input xdenormM; input logic xdenormM;
input ydenormM; input logic ydenormM;
input zdenormM; input logic zdenormM;
input xinfM; input logic xinfM;
input yinfM; input logic yinfM;
input zinfM; input logic zinfM;
input xnanM; input logic xnanM;
input ynanM; input logic ynanM;
input znanM; input logic znanM;
input nanM; input logic nanM;
input [8:0] sumshiftM; input logic [8:0] sumshiftM;
input sumshiftzeroM; input logic sumshiftzeroM;
input [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
output [4:0] FmaFlagsM; // status flags output logic [4:0] FmaFlagsM; // status flags
// Internal nodes // Internal nodes

View File

@ -12,14 +12,13 @@
module lza(sum, normcnt, sumzero); module lza(sum, normcnt, sumzero);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [163:0] sum; // sum input logic [163:0] sum; // sum
output [8:0] normcnt; // normalization shift count output logic [8:0] normcnt; // normalization shift count
output sumzero; // sum = 0 output logic sumzero; // sum = 0
// Internal nodes // Internal nodes
reg [8:0] i; // loop index reg [8:0] i; // loop index
reg [8:0] normcnt; // normalization shift count
// A real LOP uses a fast carry chain to find only the first 0. // A real LOP uses a fast carry chain to find only the first 0.
// It is an example of a parallel prefix algorithm. For the sake // It is an example of a parallel prefix algorithm. For the sake
@ -27,7 +26,7 @@ module lza(sum, normcnt, sumzero);
// A real LOP would also operate on the sources of the adder, not // A real LOP would also operate on the sources of the adder, not
// the result! // the result!
always @ ( sum) always_comb
begin begin
i = 0; i = 0;
while (~sum[163-i] && i <= 163) i = i+1; // search for leading one while (~sum[163-i] && i <= 163) i = i+1; // search for leading one

View File

@ -2,31 +2,32 @@
module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE); module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [51:0] xman; // Fraction of multiplicand x input logic [51:0] xman; // Fraction of multiplicand x
input [51:0] yman; // Fraction of multiplicand y input logic [51:0] yman; // Fraction of multiplicand y
input xdenormE; // is x denormalized input logic xdenormE; // is x denormalized
input ydenormE; // is y denormalized input logic ydenormE; // is y denormalized
input xzeroE; // Z is denorm input logic xzeroE; // Z is denorm
input yzeroE; // Z is denorm input logic yzeroE; // Z is denorm
output [105:0] rE; // partial product 1 output logic [105:0] rE; // partial product 1
output [105:0] sE; // partial product 2 output logic [105:0] sE; // partial product 2
wire [54:0] yExt; //y with appended 0 and assumed 1 wire [54:0] yExt; //y with appended 0 and assumed 1
wire [53:0] xExt; //y with assumed 1 wire [53:0] xExt; //y with assumed 1
wire [26:0][1:0] add1; wire [26:0][1:0] add1;
wire [26:0][54:0] pp; wire [26:0][54:0] pp;
wire [26:0] e; wire [26:0] e;
logic [17:0][105:0] lv1add; logic [106:0] tmpsE;
logic [11:0][105:0] lv2add; logic [17:0][106:0] lv1add;
logic [7:0][105:0] lv3add; logic [11:0][106:0] lv2add;
logic [3:0][105:0] lv4add; logic [7:0][106:0] lv3add;
logic [21:0][106:0] carryTmp; logic [3:0][106:0] lv4add;
wire [26:0][105:0] acc; logic [21:0][107:0] carryTmp;
wire [26:0][106:0] acc;
// wire [105:0] acc // wire [105:0] acc
genvar i; genvar i;
assign xExt = {2'b0,~(xdenormE|xzeroE),xman}; assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
assign yExt = {2'b0,~(ydenormE|yzeroE),yman, 1'b0}; assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
generate generate
for(i=0; i<27; i=i+1) begin for(i=0; i<27; i=i+1) begin
@ -35,69 +36,70 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
endgenerate endgenerate
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]}; assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0}; assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0}; assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0}; assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0}; assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0}; assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0}; assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0}; assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0}; assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0}; assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0}; assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0}; assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0}; assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0}; assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0}; assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0}; assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0}; assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0}; assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0}; assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0}; assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0}; assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0}; assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0}; assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0}; assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0}; assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
assign acc[26] = {pp[26],add1[25], 50'b0}; assign acc[26] = {pp[26],add1[25], 50'b0};
//*** resize adders //*** resize adders
generate generate
for(i=0; i<9; i=i+1) begin for(i=0; i<9; i=i+1) begin
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1])); .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0}; assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
end end
endgenerate endgenerate
generate generate
for(i=0; i<6; i=i+1) begin for(i=0; i<6; i=i+1) begin
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1])); .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0}; assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
end end
endgenerate endgenerate
generate generate
for(i=0; i<4; i=i+1) begin for(i=0; i<4; i=i+1) begin
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1])); .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0}; assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
end end
endgenerate endgenerate
generate generate
for(i=0; i<2; i=i+1) begin for(i=0; i<2; i=i+1) begin
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]), add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1])); .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0}; assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
end end
endgenerate endgenerate
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) , add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
.carry(carryTmp[21]), .sum(sE)); .carry(carryTmp[21]), .sum(tmpsE));
assign sE = tmpsE[105:0];
assign rE = {carryTmp[21][104:0], 1'b0}; assign rE = {carryTmp[21][104:0], 1'b0};
// assign rE = 0; // assign rE = 0;
// assign sE = acc[0] + // assign sE = acc[0] +
@ -131,3 +133,4 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman}; // assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
// assign rE = 0; // assign rE = 0;
endmodule endmodule

View File

@ -17,35 +17,31 @@
module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero, module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero,
xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v); xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [163:0] sum; // sum input logic [163:0] sum; // sum
input [62:52] zexp; // sum input logic [62:52] zexp; // sum
input [8:0] normcnt; // normalization shift count input logic [8:0] normcnt; // normalization shift count
input [12:0] aeM; // normalization shift count input logic [12:0] aeM; // normalization shift count
input [12:0] aligncntM; // normalization shift count input logic [12:0] aligncntM; // normalization shift count
input [8:0] sumshiftM; // normalization shift count input logic [8:0] sumshiftM; // normalization shift count
input sumshiftzeroM; input logic sumshiftzeroM;
input sumzero; // sum is zero input logic sumzero; // sum is zero
input bsM; // sticky bit for addend input logic bsM; // sticky bit for addend
input xdenormM; // Input Z is denormalized input logic xdenormM; // Input Z is denormalized
input ydenormM; // Input Z is denormalized input logic ydenormM; // Input Z is denormalized
input zdenormM; // Input Z is denormalized input logic zdenormM; // Input Z is denormalized
input xzeroM; input logic xzeroM;
input yzeroM; input logic yzeroM;
input zzeroM; input logic zzeroM;
output sticky; //sticky bit output logic sticky; //sticky bit
output [12:0] de0; output logic [12:0] de0;
output resultdenorm; // Input Z is denormalized output logic resultdenorm; // Input Z is denormalized
output [53:0] v; // normalized sum, R, S bits output logic [53:0] v; // normalized sum, R, S bits
// Internal nodes // Internal nodes
reg [53:0] v; // normalized sum, R, S bits logic [163:0] sumshifted; // shifted sum
logic resultdenorm; // Input Z is denormalized
logic [12:0] de0;
logic [163:0] sumshifted; // shifted sum
logic [9:0] sumshifttmp; logic [9:0] sumshifttmp;
logic [163:0] sumshiftedtmp; // shifted sum logic [163:0] sumshiftedtmp; // shifted sum
logic sticky;
logic isShiftLeft1; logic isShiftLeft1;
logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5; logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
@ -60,28 +56,28 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// The sticky bit calculation is actually built into the shifter and // The sticky bit calculation is actually built into the shifter and
// does not require a true subtraction shown in the model. // does not require a true subtraction shown in the model.
assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2; assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022)); // assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
always @(sum or sumshiftM or aeM or aligncntM or normcnt or bsM or isShiftLeft1 or zexp or zdenormM) always_comb
begin begin
// d = aligncntM // d = aligncntM
// l = normcnt // l = normcnt
// p = 53 // p = 53
// ea + eb = aeM // ea + eb = aeM
// set d<=2 to d<=0 // set d<=2 to d<=0
if ($signed(aligncntM)<=$signed(2)) begin //d<=2 if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
// product anchored or cancellation // product anchored or cancellation
if ($signed(aeM-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
//normal result //normal result
de0 = xzeroM|yzeroM ? zexp : aeM-normcnt+xdenormM+ydenormM+57; de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
resultdenorm = |sum & ~|de0 | de0[12]; resultdenorm = |sum & ~|de0 | de0[12];
// if z is zero then there was a 56 bit shift of the product // if z is zero then there was a 56 bit shift of the product
sumshifted = resultdenorm ? sum << sumshiftM-zzeroM+isShiftLeft1 : sum << normcnt; // p+2+l sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
v = sumshifted[162:109]; v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM; sticky = (|sumshifted[108:0]) | bsM;
//de0 = aeM-normcnt+2-1023; //de0 = aeM-normcnt+2-1023;
end else begin end else begin
sumshifted = sum << (1080+aeM); sumshifted = sum << (13'd1080+aeM);
v = sumshifted[162:109]; v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM; sticky = (|sumshifted[108:0]) | bsM;
resultdenorm = 1; resultdenorm = 1;
@ -100,29 +96,29 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// the book says exp = zexp + {-1,0,1} // the book says exp = zexp + {-1,0,1}
if(sumshiftzeroM) begin if(sumshiftzeroM) begin
v = sum[162:109]; v = sum[162:109];
sticky = sum[108:0] | bsM; sticky = (|sum[108:0]) | bsM;
de0 = zexp; de0 = {2'b0,zexp};
end else if(sumshifted[163] & ~sumshifttmp[9])begin end else if(sumshifted[163] & ~sumshifttmp[9])begin
v = sumshifted[162:109]; v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM; sticky = (|sumshifted[108:0]) | bsM;
de0 = zexp +2; de0 = {2'b0,zexp} +13'd2;
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
v = sumshifted[161:108]; v = sumshifted[161:108];
sticky = (|sumshifted[107:0]) | bsM; sticky = (|sumshifted[107:0]) | bsM;
de0 = zexp+1; de0 = {2'b0,zexp}+13'd1;
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
v = sumshifted[160:107]; v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM; sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
de0 = zexp+zdenormM; de0 = {2'b0,zexp}+{12'b0,zdenormM};
end else if(sumshifted[160]& ~zdenormM) begin end else if(sumshifted[160]& ~zdenormM) begin
de0 = zexp-1; de0 = {2'b0,zexp}-13'b1;
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106]; v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
sticky = (|sumshifted[105:0]) | bsM; sticky = (|sumshifted[105:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
end else if(sumshifted[159]& ~zdenormM) begin end else if(sumshifted[159]& ~zdenormM) begin
//v = sumshifted[158:105]; //v = sumshifted[158:105];
de0 = zexp-2; de0 = {2'b0,zexp}-13'd2;
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105]; v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
sticky = (|sumshifted[104:0]) | bsM; sticky = (|sumshifted[104:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
@ -130,7 +126,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
v = sumshifted[160:107]; v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM; sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
de0 = zexp; de0 = {{2{zexp[62]}},zexp};
end else begin end else begin
de0 = 0; de0 = 0;
sumshifted = sum << sumshiftM-1; // p+2+l sumshifted = sum << sumshiftM-1; // p+2+l
@ -148,3 +144,4 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
endmodule endmodule

View File

@ -4,7 +4,7 @@
// Date: 11/2/1995 // Date: 11/2/1995
// //
// Block Description: // Block Description:
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z inputs, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay. // This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
// //
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also // The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
// muxed in to form the actual result for register file writeback. This // muxed in to form the actual result for register file writeback. This
@ -19,23 +19,23 @@ module round(v, sticky, FrmM, wsign,
wman, infinity, specialsel,expplus1); wman, infinity, specialsel,expplus1);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [53:0] v; // normalized sum, R, S bits input logic [53:0] v; // normalized sum, R, S bits
input sticky; //sticky bit input logic sticky; //sticky bit
input [2:0] FrmM; input logic [2:0] FrmM;
input wsign; // Sign of result input logic wsign; // Sign of result
input [4:0] FmaFlagsM; input logic [4:0] FmaFlagsM;
input inf; // Some input is infinity input logic inf; // Some input logic is infinity
input nanM; // Some input is NaN input logic nanM; // Some input logic is NaN
input xnanM; // X is NaN input logic xnanM; // X is NaN
input ynanM; // Y is NaN input logic ynanM; // Y is NaN
input znanM; // Z is NaN input logic znanM; // Z is NaN
input [51:0] xman; // Input X input logic [51:0] xman; // input logic X
input [51:0] yman; // Input Y input logic [51:0] yman; // input logic Y
input [51:0] zman; // Input Z input logic [51:0] zman; // input logic Z
output [51:0] wman; // rounded result of FMAC output logic [51:0] wman; // rounded result of FMAC
output infinity; // Generate infinity on overflow output logic infinity; // Generate infinity on overflow
output specialsel; // Select special result output logic specialsel; // Select special result
output expplus1; output logic expplus1;
// Internal nodes // Internal nodes
@ -56,7 +56,7 @@ module round(v, sticky, FrmM, wsign,
// 0xx - do nothing // 0xx - do nothing
// 100 - tie - plus1 if v[2] = 1 // 100 - tie - plus1 if v[2] = 1
// 101/110/111 - plus1 // 101/110/111 - plus1
always @ (FrmM, v, wsign, sticky) begin always_comb begin
case (FrmM) case (FrmM)
3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even 3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even
3'b001: plus1 = 0;//round to zero 3'b001: plus1 = 0;//round to zero
@ -85,7 +85,7 @@ module round(v, sticky, FrmM, wsign,
// The special result mux is a 4:1 mux that should not appear in the // The special result mux is a 4:1 mux that should not appear in the
// critical path of the machine. It is not priority encoded, despite // critical path of the machine. It is not priority encoded, despite
// the code below suggesting otherwise. Also, several of the identical data // the code below suggesting otherwise. Also, several of the identical data
// inputs to the wide muxes can be combined at the expense of more // input logics to the wide muxes can be combined at the expense of more
// complicated non-critical control in the circuit implementation. // complicated non-critical control in the circuit implementation.
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
@ -102,15 +102,15 @@ module round(v, sticky, FrmM, wsign,
assign infinityres = infinity ? 52'b0 : {52{1'b1}}; assign infinityres = infinity ? 52'b0 : {52{1'b1}};
// Invalid operations produce a quiet NaN. The result should // Invalid operations produce a quiet NaN. The result should
// propagate an input if the input is NaN. Since we assume all // propagate an input logic if the input logic is NaN. Since we assume all
// NaN inputs are already quiet, we don't have to force them quiet. // NaN input logics are already quiet, we don't have to force them quiet.
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original // assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
// IEEE 754-2008 section 6.2.3 states: // IEEE 754-2008 section 6.2.3 states:
// "If two or more inputs are NaN, then the payload of the resulting NaN should be // "If two or more input logics are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input NaNs if representable in the destination // identical to the payload of one of the input logic NaNs if representable in the destination
// format. This standard does not specify which of the input NaNs will provide the payload." // format. This standard does not specify which of the input logic NaNs will provide the payload."
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
// Select result with 4:1 mux // Select result with 4:1 mux

View File

@ -14,30 +14,28 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd); sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd);
////////////////////////////////////////////////////////////////////////////I ////////////////////////////////////////////////////////////////////////////I
input xsign; // Sign of X input logic xsign; // Sign of X
input ysign; // Sign of Y input logic ysign; // Sign of Y
input zsign; // Sign of Z input logic zsign; // Sign of Z
input isAdd; input logic isAdd;
input negsum0; // Sum in +O mode is negative input logic negsum0; // Sum in +O mode is negative
input negsum1; // Sum in +1 mode is negative input logic negsum1; // Sum in +1 mode is negative
input bsM; // sticky bit from addend input logic bsM; // sticky bit from addend
input [2:0] FrmM; // Round toward minus infinity input logic [2:0] FrmM; // Round toward minus infinity
input [4:0] FmaFlagsM; // Round toward minus infinity input logic [4:0] FmaFlagsM; // Round toward minus infinity
input sumzero; // Sum = O input logic sumzero; // Sum = O
input zinfM; // Y = Inf input logic zinfM; // Y = Inf
input inf; // Some input = Inf input logic inf; // Some input logic = Inf
output wsign; // Sign of W output logic wsign; // Sign of W
output invz; // Invert addend into adder output logic invz; // Invert addend into adder
output negsum; // Negate result of adder output logic negsum; // Negate result of adder
output selsum1; // Select +1 mode from compound adder output logic selsum1; // Select +1 mode from compound adder
// Internal nodes // Internal nodes
wire zerosign; // sign if result= 0 wire zerosign; // sign if result= 0
wire sumneg; // sign if result= 0 wire sumneg; // sign if result= 0
wire infsign; // sign if result= Inf wire infsign; // sign if result= Inf
reg negsum; // negate result of adder
reg selsum1; // select +1 mode from compound adder
logic tmp; logic tmp;
// Compute sign of product // Compute sign of product
@ -59,7 +57,7 @@ logic tmp;
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign); assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
//always @(invz or negsum0 or negsum1 or bsM or ps) //always @(invz or negsum0 or negsum1 or bsM or ps)
// begin // begin
// if (~invz) begin // both inputs have same sign // if (~invz) begin // both input logics have same sign
// negsum = 0; // negsum = 0;
// selsum1 = 0; // selsum1 = 0;
// end else if (bsM) begin // sticky bit set on addend // end else if (bsM) begin // sticky bit set on addend
@ -84,7 +82,7 @@ logic tmp;
// Sign calculation is not in the critical path so the cases // Sign calculation is not in the critical path so the cases
// can be tolerated. // can be tolerated.
// IEEE 754-2008 section 6.3 states // IEEE 754-2008 section 6.3 states
// "When ether an input or result is NaN, this standard does not interpret the sign of a NaN." // "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
// also pertaining to negZero it states: // also pertaining to negZero it states:
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference // "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero // shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero

View File

@ -14,21 +14,21 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE); xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input [63:0] ReadData1E; // Input ReadData1E input logic [63:0] ReadData1E; // Input ReadData1E
input [63:0] ReadData2E; // Input ReadData2E input logic [63:0] ReadData2E; // Input ReadData2E
input [63:0] ReadData3E; // Input ReadData3E input logic [63:0] ReadData3E; // Input ReadData3E
output xzeroE; // Input ReadData1E = 0 output logic xzeroE; // Input ReadData1E = 0
output yzeroE; // Input ReadData2E = 0 output logic yzeroE; // Input ReadData2E = 0
output zzeroE; // Input ReadData3E = 0 output logic zzeroE; // Input ReadData3E = 0
output xnanE; // ReadData1E is NaN output logic xnanE; // ReadData1E is NaN
output ynanE; // ReadData2E is NaN output logic ynanE; // ReadData2E is NaN
output znanE; // ReadData3E is NaN output logic znanE; // ReadData3E is NaN
output xdenormE; // ReadData1E is denormalized output logic xdenormE; // ReadData1E is denormalized
output ydenormE; // ReadData2E is denormalized output logic ydenormE; // ReadData2E is denormalized
output zdenormE; // ReadData3E is denormalized output logic zdenormE; // ReadData3E is denormalized
output xinfE; // ReadData1E is infinity output logic xinfE; // ReadData1E is infinity
output yinfE; // ReadData2E is infinity output logic yinfE; // ReadData2E is infinity
output zinfE; // ReadData3E is infinity output logic zinfE; // ReadData3E is infinity
// In the actual circuit design, the gates looking at bits // In the actual circuit design, the gates looking at bits
// 51:0 and at bits 62:52 should be shared among the various detectors. // 51:0 and at bits 62:52 should be shared among the various detectors.
@ -60,7 +60,7 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE; // assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE; // assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE; // assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number // KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
assign xzeroE = ~(|ReadData1E[62:0]); assign xzeroE = ~(|ReadData1E[62:0]);
assign yzeroE = ~(|ReadData2E[62:0]); assign yzeroE = ~(|ReadData2E[62:0]);
assign zzeroE = ~(|ReadData3E[62:0]); assign zzeroE = ~(|ReadData3E[62:0]);

View File

@ -1 +1 @@
testfloat_gen f64_mulAdd -n 6133248 -rminMag -seed 113355 -level 1 >> testFloat testfloat_gen f64_mulAdd -n 6133248 -rnear_even -seed 113355 -level 1 >> testFloat

View File

@ -34,6 +34,7 @@ module add(rM, sM, tM, sum,
wire [164:0] sum0; // sum of compound adder +0 mode wire [164:0] sum0; // sum of compound adder +0 mode
wire [164:0] sum1; // sum of compound adder +1 mode wire [164:0] sum1; // sum of compound adder +1 mode
wire [163:0] prodshifted; // sum of compound adder +1 mode wire [163:0] prodshifted; // sum of compound adder +1 mode
wire [164:0] tmp; // sum of compound adder +1 mode
// Invert addend if z'sM sign is diffrent from the product'sM sign // Invert addend if z'sM sign is diffrent from the product'sM sign
@ -44,12 +45,13 @@ module add(rM, sM, tM, sum,
assign r2 = killprodM ? 106'b0 : rM; assign r2 = killprodM ? 106'b0 : rM;
assign s2 = killprodM ? 106'b0 : sM; assign s2 = killprodM ? 106'b0 : sM;
//replace this with a more structural cpa that synthisises better //***replace this with a more structural cpa that synthisises better
// Compound adder // Compound adder
// Consists of 3:2 CSA followed by long compound CPA // Consists of 3:2 CSA followed by long compound CPA
// assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0}; //assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0};
assign sum0 = {1'b0,prodshifted} + t2 + 158'b0 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0}; //assign tmp = ({{57{r2[105]}},r2, 2'b0} + {{57{s2[105]}},s2, 2'b0});
assign sum1 = {1'b0,prodshifted} + t2 + 158'b1 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0}; // +1 from invert of z above assign sum0 = t2 + 164'b0 + {57'b0, r2+s2, 2'b0};
assign sum1 = t2 + 164'b1 + {57'b0, r2+s2, 2'b0}; // +1 from invert of z above
// Check sign bits in +0/1 modes // Check sign bits in +0/1 modes
assign negsum0 = sum0[164]; assign negsum0 = sum0[164];
@ -60,3 +62,4 @@ module add(rM, sM, tM, sum,
assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]); assign sum = selsum1 ? (negsum ? -sum1[163:0] : sum1[163:0]) : (negsum ? -sum0[163:0] : sum0[163:0]);
endmodule endmodule

View File

@ -88,15 +88,15 @@ module BLOCK2A ( PIN2, GIN1, GIN2, GOUT );
assign GOUT = ~ (GIN2 | (PIN2 & GIN1)); assign GOUT = ~ (GIN2 | (PIN2 & GIN1));
endmodule endmodule
//***KEP all 0:63, 0:64 ect changed - changed due to lint warning
module PRESTAGE_64 ( A, B, CIN, POUT, GOUT ); module PRESTAGE_64 ( A, B, CIN, POUT, GOUT );
input [0:63] A; input [63:0] A;
input [0:63] B; input [63:0] B;
input CIN; input CIN;
output [0:63] POUT; output [63:0] POUT;
output [0:64] GOUT; output [64:0] GOUT;
BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] ); BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] );
BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] ); BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] );
@ -169,11 +169,11 @@ endmodule // PRESTAGE_64
module DBLC_0_64 ( PIN, GIN, POUT, GOUT ); module DBLC_0_64 ( PIN, GIN, POUT, GOUT );
input [0:63] PIN; input [63:0] PIN;
input [0:64] GIN; input [64:0] GIN;
output [0:62] POUT; output [62:0] POUT;
output [0:64] GOUT; output [64:0] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] ); INVBLOCK U10 (GIN[0] , GOUT[0] );
BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] ); BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] );
@ -246,11 +246,11 @@ endmodule // DBLC_0_64
module DBLC_1_64 ( PIN, GIN, POUT, GOUT ); module DBLC_1_64 ( PIN, GIN, POUT, GOUT );
input [0:62] PIN; input [62:0] PIN;
input [0:64] GIN; input [64:0] GIN;
output [0:60] POUT; output [60:0] POUT;
output [0:64] GOUT; output [64:0] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] ); INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] ); INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -323,11 +323,11 @@ endmodule // DBLC_1_64
module DBLC_2_64 ( PIN, GIN, POUT, GOUT ); module DBLC_2_64 ( PIN, GIN, POUT, GOUT );
input [0:60] PIN; input [60:0] PIN;
input [0:64] GIN; input [64:0] GIN;
output [0:56] POUT; output [56:0] POUT;
output [0:64] GOUT; output [64:0] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] ); INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] ); INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -400,11 +400,11 @@ endmodule // DBLC_2_64
module DBLC_3_64 ( PIN, GIN, POUT, GOUT ); module DBLC_3_64 ( PIN, GIN, POUT, GOUT );
input [0:56] PIN; input [56:0] PIN;
input [0:64] GIN; input [64:0] GIN;
output [0:48] POUT; output [48:0] POUT;
output [0:64] GOUT; output [64:0] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] ); INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] ); INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -477,11 +477,11 @@ endmodule // DBLC_3_64
module DBLC_4_64 ( PIN, GIN, POUT, GOUT ); module DBLC_4_64 ( PIN, GIN, POUT, GOUT );
input [0:48] PIN; input [48:0] PIN;
input [0:64] GIN; input [64:0] GIN;
output [0:32] POUT; output [32:0] POUT;
output [0:64] GOUT; output [64:0] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] ); INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] ); INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -554,11 +554,11 @@ endmodule // DBLC_4_64
module DBLC_5_64 ( PIN, GIN, POUT, GOUT ); module DBLC_5_64 ( PIN, GIN, POUT, GOUT );
input [0:32] PIN; input [32:0] PIN;
input [0:64] GIN; input [64:0] GIN;
output [0:0] POUT; output [0:0] POUT;
output [0:64] GOUT; output [64:0] GOUT;
INVBLOCK U10 (GIN[0] , GOUT[0] ); INVBLOCK U10 (GIN[0] , GOUT[0] );
INVBLOCK U11 (GIN[1] , GOUT[1] ); INVBLOCK U11 (GIN[1] , GOUT[1] );
@ -631,12 +631,12 @@ endmodule // DBLC_5_64
module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT ); module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT );
input [0:63] A; input [63:0] A;
input [0:63] B; input [63:0] B;
input PBIT; input PBIT;
input [0:64] CARRY; input [64:0] CARRY;
output [0:63] SUM; output [63:0] SUM;
output COUT; output COUT;
XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] ); XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] );
@ -710,22 +710,22 @@ endmodule // XORSTAGE_64
module DBLCTREE_64 ( PIN, GIN, GOUT, POUT ); module DBLCTREE_64 ( PIN, GIN, GOUT, POUT );
input [0:63] PIN; input [63:0] PIN;
input [0:64] GIN; input [64:0] GIN;
output [0:64] GOUT; output [64:0] GOUT;
output [0:0] POUT; output [0:0] POUT;
wire [0:62] INTPROP_0; wire [62:0] INTPROP_0;
wire [0:64] INTGEN_0; wire [64:0] INTGEN_0;
wire [0:60] INTPROP_1; wire [60:0] INTPROP_1;
wire [0:64] INTGEN_1; wire [64:0] INTGEN_1;
wire [0:56] INTPROP_2; wire [56:0] INTPROP_2;
wire [0:64] INTGEN_2; wire [64:0] INTGEN_2;
wire [0:48] INTPROP_3; wire [48:0] INTPROP_3;
wire [0:64] INTGEN_3; wire [64:0] INTGEN_3;
wire [0:32] INTPROP_4; wire [32:0] INTPROP_4;
wire [0:64] INTGEN_4; wire [64:0] INTGEN_4;
DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) ); DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) );
DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) ); DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) );
@ -739,20 +739,20 @@ endmodule // DBLCTREE_64
module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT ); module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT );
input [0:63] OPA; input [63:0] OPA;
input [0:63] OPB; input [63:0] OPB;
input CIN; input CIN;
output [0:63] SUM; output [63:0] SUM;
output COUT; output COUT;
wire [0:63] INTPROP; wire [63:0] INTPROP;
wire [0:64] INTGEN; wire [64:0] INTGEN;
wire [0:0] PBIT; wire [0:0] PBIT;
wire [0:64] CARRY; wire [64:0] CARRY;
PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN ); PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN );
DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT ); DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT );
XORSTAGE_64 U3 (OPA[0:63] , OPB[0:63] , PBIT[0] , CARRY[0:64] , SUM , COUT ); XORSTAGE_64 U3 (OPA[63:0] , OPB[63:0] , PBIT[0] , CARRY[64:0] , SUM , COUT );
endmodule endmodule

View File

@ -30,21 +30,10 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
// Internal nodes // Internal nodes
reg [215:0] shift; // aligned addend from shifter reg [215:0] shift; // aligned addend from shifter
logic zexpsel; // sticky bit of product logic [12:0] tmp;
reg [7:0] i; // temp storage for finding sticky bit
wire [52:0] z1; // Z plus 1
wire [51:0] z2; // Z selected after handling rounds
// Compute sign of aligncntE + 104 to check for shifting too far right
//assign align104 = aligncntE+104;
// Shift addend by alignment count. Generate sticky bits from
// addend on right shifts. Handle special cases of shifting
// by too much.
//***change always @ to always_combs
always_comb always_comb
begin begin
@ -55,32 +44,34 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
killprodE = xzeroE | yzeroE; killprodE = xzeroE | yzeroE;
// d = aligncntE // d = aligncntE
// p = 53 // p = 53
//***try reducing this hardware try getting onw shifter //***try reducing this hardware to use one shifter
if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1 if ($signed(aligncntE) <= $signed(-(13'd105))) begin //d<=-2p+1
//product ancored case with saturated shift //product ancored case with saturated shift
sumshiftE = 163; // 3p+4 sumshiftE = 163; // 3p+4
sumshiftzeroE = 0; sumshiftzeroE = 0;
shift = {1'b1,zman,163'b0} >> sumshiftE; shift = {1'b1,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
//zexpsel = 0;
end else if($signed(aligncntE) <= $signed(2)) begin // -2p+1<d<=2 end else if($signed(aligncntE) <= $signed(13'd2)) begin // -2p+1<d<=2
// product ancored or cancellation // product ancored or cancellation
sumshiftE = 57-aligncntE; // p + 2 - d tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0]; // p + 2 - d
sumshiftzeroE = 0; sumshiftzeroE = 0;
shift = {~zdenormE,zman,163'b0} >> sumshiftE; shift = {~zdenormE,zman,163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
//zexpsel = 0;
end else if ($signed(aligncntE)<=$signed(55)) begin // 2 < d <= p+2 end else if ($signed(aligncntE)<=$signed(13'd55)) begin // 2 < d <= p+2
// addend ancored case // addend ancored case
// used to be 56 \/ somthing doesn'tE seem right too many typos // used to be 56 \/ somthing doesn't seem right too many typos
sumshiftE = 57-aligncntE; tmp = 13'd57-aligncntE;
sumshiftE = tmp[8:0];
sumshiftzeroE = 0; sumshiftzeroE = 0;
shift = {~zdenormE,zman, 163'b0} >> sumshiftE; shift = {~zdenormE,zman, 163'b0} >> sumshiftE;
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
//zexpsel = 1;
end else begin // d >= p+3 end else begin // d >= p+3
// addend anchored case with saturated shift // addend anchored case with saturated shift
sumshiftE = 0; sumshiftE = 0;
@ -89,15 +80,9 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE,
tE = zzeroE ? 0 : {shift[215:52]}; tE = zzeroE ? 0 : {shift[215:52]};
bsE = |(shift[51:0]); bsE = |(shift[51:0]);
killprodE = 1; killprodE = 1;
//ps = 1;
//zexpsel = 1;
// use some behavioral code to find sticky bit. This is really
// done by hardware in the shifter.
//if (aligncntE < 0)
// for (i=0; i<-aligncntE-52; i = i+1)
// bsE = bsE || z2[i];
end end
end end
endmodule endmodule

View File

@ -31,6 +31,11 @@ module kogge_stone (h, c, p, g);
output [15:1] h; output [15:1] h;
output [15:1] c; output [15:1] c;
logic H_1_0,H_2_1,I_2_1,H_3_2,I_3_2,H_4_3,I_4_3,H_5_4,I_5_4,H_6_5,I_6_5,H_7_6,I_7_6,H_8_7,I_8_7,H_9_8,I_9_8,H_10_9
,I_10_9,H_11_10,I_11_10,H_12_11,I_12_11,H_13_12,I_13_12,H_14_13,I_14_13,H_2_0,H_3_0,H_4_1,I_4_1,H_5_2,I_5_2,H_6_3
,I_6_3,H_7_4,I_7_4,H_8_5,I_8_5,H_9_6,I_9_6,H_10_7,I_10_7,H_11_8,I_11_8,H_12_9,I_12_9,H_13_10,I_13_10,H_14_11,I_14_11
,H_4_0,H_5_0,H_6_0,H_7_0,H_8_1,I_8_1,H_9_2,I_9_2,H_10_3,I_10_3,H_11_4,I_11_4,H_12_5,I_12_5,H_13_6,I_13_6,H_14_7
,I_14_7,H_8_0,H_9_0,H_10_0,H_11_0,H_12_0,H_13_0,H_14_0;
// parallel-prefix, Kogge-Stone // parallel-prefix, Kogge-Stone

View File

@ -22,9 +22,6 @@ module booth(xExt, choose, add1, e, pp);
3'b100 : pp = {negx, 1'b0}; // -2 3'b100 : pp = {negx, 1'b0}; // -2
3'b101 : pp = {1'b1, negx}; // -1 3'b101 : pp = {1'b1, negx}; // -1
3'b110 : pp = {1'b1, negx}; // -1 3'b110 : pp = {1'b1, negx}; // -1
// *** <Thomas Fleming> I changed this to fix a lint error. '1 should
// fill the signal with all ones.
// 3'b111 : pp = 55'hfffffffffffffff;
3'b111 : pp = '1; // -0 3'b111 : pp = '1; // -0
endcase endcase

View File

@ -9,7 +9,7 @@ module cla12 (S, CO, X, Y);
output [11:0] S; output [11:0] S;
output CO; output CO;
wire [0:63] A,B,Q; wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning
wire LOGIC0; wire LOGIC0;
wire CIN; wire CIN;
wire CO_64; wire CO_64;
@ -174,10 +174,11 @@ module cla_sub12 (S, X, Y);
output [11:0] S; output [11:0] S;
wire [0:63] A,B,Q,Bbar; wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
wire CO; wire CO;
wire LOGIC0; wire LOGIC0;
wire VDD; wire VDD;
logic CO_12;
assign Bbar = ~B; assign Bbar = ~B;
assign LOGIC0 = 0; assign LOGIC0 = 0;

View File

@ -9,7 +9,7 @@ module cla52 (S, CO, X, Y);
output [51:0] S; output [51:0] S;
output CO; output CO;
wire [0:63] A,B,Q; wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning
wire LOGIC0; wire LOGIC0;
wire CIN; wire CIN;
wire CO_64; wire CO_64;
@ -211,7 +211,7 @@ module cla_sub52 (S, X, Y);
output [51:0] S; output [51:0] S;
wire [0:63] A,B,Q,Bbar; wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning
wire LOGIC0; wire LOGIC0;
wire CIN; wire CIN;
wire CO_52; wire CO_52;

View File

@ -9,7 +9,7 @@ module cla64 (S, X, Y, Sub);
input Sub; input Sub;
output [63:0] S; output [63:0] S;
wire CO; wire CO;
wire [0:63] A,B,Q, Bbar; wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO ); DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO );
assign A[0] = X[0]; assign A[0] = X[0];
@ -220,7 +220,7 @@ module cla_sub64 (S, X, Y);
wire CO; wire CO;
wire VDD = 1'b1; wire VDD = 1'b1;
wire [0:63] A,B,Q, Bbar; wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning
DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO ); DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO );
assign A[0] = X[0]; assign A[0] = X[0];

View File

@ -42,6 +42,8 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
logic [63:0] d2, n2; logic [63:0] d2, n2;
logic [11:0] d3; logic [11:0] d3;
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7, muxr_out;
// Check if exponent is odd for sqrt // Check if exponent is odd for sqrt
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
assign d2 = (exp_odd&op_type) ? {vss,d,10'h0} : {d,11'h0}; assign d2 = (exp_odd&op_type) ? {vss,d,10'h0} : {d,11'h0};

View File

@ -50,7 +50,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
// if exponent is out of bounds // if exponent is out of bounds
assign aeE = xzeroE|yzeroE ? 0 : xexp + yexp -1023; assign aeE = xzeroE|yzeroE ? 0 : {2'b0,xexp} + {2'b0,yexp} - 13'd1023;
assign prodof = (aeE > 2046 && ~aeE[12]); assign prodof = (aeE > 2046 && ~aeE[12]);
@ -61,7 +61,7 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
// is masked by the bypass mux and two 10 bit adder delays. // is masked by the bypass mux and two 10 bit adder delays.
// assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE; // assign aligncnt0 = - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
// assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE}; // assign aligncnt1 = - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
assign aligncntE = zexp -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE}; assign aligncntE = {2'b0,zexp} -aeE - 1 + {12'b0,~xdenormE} + {12'b0,~ydenormE} - {12'b0,~zdenormE};
//assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE; //assign aligncntE = zexp -aeE - 1 + ~xdenormE + ~ydenormE - ~zdenormE;
//assign aligncntE = zexp - aeE;// KEP use all of aeE //assign aligncntE = zexp - aeE;// KEP use all of aeE
@ -87,3 +87,4 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE,
// rounding mode. NaNs are propagated or generated. // rounding mode. NaNs are propagated or generated.
endmodule endmodule

View File

@ -102,6 +102,7 @@ module expgen2(xexp, yexp, zexp,
// A mux selects the early result from other FPU blocks or the // A mux selects the early result from other FPU blocks or the
// normalized FMAC result. Special cases are also detected. // normalized FMAC result. Special cases are also detected.
assign wexp = specialsel ? specialres[10:0] : de[10:0] + expplus1; assign wexp = specialsel ? specialres[10:0] : de[10:0] + {10'b0,expplus1};
endmodule endmodule

View File

@ -3,20 +3,23 @@ module fctrl (
input logic [6:0] Funct7D, input logic [6:0] Funct7D,
input logic [6:0] OpD, input logic [6:0] OpD,
input logic [4:0] Rs2D, input logic [4:0] Rs2D,
input logic [4:0] Rs1D, input logic [2:0] Funct3D,
input logic [2:0] FrmW, input logic [2:0] FRM_REGW,
output logic WriteEnD, output logic IllegalFPUInstrD,
output logic FRegWriteD,
output logic DivSqrtStartD, output logic DivSqrtStartD,
//output logic [2:0] regSelD, //output logic [2:0] regSelD,
output logic [2:0] WriteSelD, output logic [2:0] FResultSelD,
output logic [3:0] OpCtrlD, output logic [3:0] OpCtrlD,
output logic FmtD, output logic FmtD,
output logic [2:0] FrmD,
output logic WriteIntD); output logic WriteIntD);
//precision is taken directly from instruction //precision is taken directly from instruction
assign FmtD = Funct7D[0]; assign FmtD = Funct7D[0];
// *** fix rounding for dynamic rounding
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
//all subsequent logic is based on the table present //all subsequent logic is based on the table present
//in Section 5 of Wally Architecture Specification //in Section 5 of Wally Architecture Specification
@ -29,59 +32,75 @@ module fctrl (
//in case of errors //in case of errors
case(OpD) case(OpD)
//fp instructions sans load //fp instructions sans load
7'b1010011 : begin isFP = 1'b1; isFPLD = 1'b0; end 7'b1010011 : isFP = 1'b1;
7'b1000011 : begin isFP = 1'b1; isFPLD = 1'b0; end 7'b1000011 : isFP = 1'b1;
7'b1000111 : begin isFP = 1'b1; isFPLD = 1'b0; end 7'b1000111 : isFP = 1'b1;
7'b1001011 : begin isFP = 1'b1; isFPLD = 1'b0; end 7'b1001011 : isFP = 1'b1;
7'b1001111 : begin isFP = 1'b1; isFPLD = 1'b0; end 7'b1001111 : isFP = 1'b1;
7'b0100111 : begin isFP = 1'b1; isFPLD = 1'b0; end 7'b0100111 : isFP = 1'b1;
//fp load 7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111
7'b1010011 : begin isFP = 1'b1; isFPLD = 1'b1; end default : isFP = 1'b0;
default : begin isFP = 1'b0; isFPLD = 1'b0; end
endcase endcase
end end
assign WriteEnD = isFP & ~isFPLD;
//useful intermediary signals //useful intermediary signals
// //
//(mult only not supported in current datapath) //(mult only not supported in current datapath)
//set third FMA operand to zero in this case //set third FMA operand to zero in this case
//(or equivalent) //(or equivalent)
logic isAddSub, isFMA, isMult, isDivSqrt, isCvt, isCmp, isFPSTR;
always_comb begin always_comb begin
//checks all but FMA/store/load //checks all but FMA/store/load
if(OpD == 7'b1010011) begin if(OpD == 7'b1010011) begin
case(Funct7D) casez(Funct7D)
//compare //compare
7'b10100?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b1; isFPSTR = 1'b0; end 7'b10100?? : FResultSelD = 3'b001;
//div/sqrt //div/sqrt
7'b0?011?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b1; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b0?011?? : FResultSelD = 3'b000;
//add/sub //add/sub
7'b0000??? : begin isAddSub = 1'b1; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b0000??? : FResultSelD = 3'b100;
//mult //mult
7'b00010?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b1; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b00010?? : FResultSelD = 3'b010;
//convert (not precision) //convert (not precision)
7'b110?0?? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b1; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b110?0?? : FResultSelD = 3'b100;
//convert (precision) //convert (precision)
7'b010000? : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b1; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b010000? : FResultSelD = 3'b100;
//Min/Max
7'b00101?? : FResultSelD = 3'b001;
//sign injection
7'b00100?? : FResultSelD = 3'b011;
//classify //only if funct3 = 001
7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101;
//output ReadData1
else if (Funct7D[1] == 0) FResultSelD = 3'b111;
//output SrcW
7'b111100? : FResultSelD = 3'b110;
default : FResultSelD = 3'bxxx;
endcase endcase
end end
//FMA/store/load //FMA/store/load
else begin else begin
case(OpD) case(OpD)
//4 FMA instructions //4 FMA instructions
7'b1000011 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b1000011 : FResultSelD = 3'b010;
7'b1000111 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b1000111 : FResultSelD = 3'b010;
7'b1001011 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b1001011 : FResultSelD = 3'b010;
7'b1001111 : begin isAddSub = 1'b0; isFMA = 1'b1; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b0; end 7'b1001111 : FResultSelD = 3'b010;
//store (load already found) //store
7'b0100111 : begin isAddSub = 1'b0; isFMA = 1'b0; isMult = 1'b0; isDivSqrt = 1'b0; isCvt = 1'b0; isCmp = 1'b0; isFPSTR = 1'b1; end 7'b0100111 : FResultSelD = 3'b111;
//load
7'b0000111 : FResultSelD = 3'b111;
default : FResultSelD = 3'bxxx;
endcase endcase
end end
end end
//register is chosen based on operation performed //register is chosen based on operation performed
//---- //----
//write selection is chosen in the same way as //write selection is chosen in the same way as
@ -90,26 +109,21 @@ module fctrl (
// reg/write sel logic and assignment // reg/write sel logic and assignment
// //
// 3'b000 = add/sub/cvt // 3'b000 = div/sqrt
// 3'b001 = sign // 3'b001 = cmp
// 3'b010 = fma // 3'b010 = fma/mult
// 3'b011 = cmp // 3'b011 = sgn inj
// 3'b100 = div/sqrt // 3'b100 = add/sub/cnvt
// 3'b101 = classify
// 3'b110 = output SrcAW
// 3'b111 = output ReadData1
// //
//reg select //reg select
//this value is used enough to be shorthand //this value is used enough to be shorthand
logic isSign;
assign isSign = ~Funct7D[6] & ~Funct7D[5] & Funct7D[4] & ~Funct7D[3] & ~Funct7D[2];
//write select
assign WriteSelD[2] = isDivSqrt & ~isFMA;
assign WriteSelD[1] = isFMA | isCmp;
//AND of Funct7 for sign
assign WriteSelD[0] = isCmp | isSign;
//if op is div/sqrt - start div/sqrt //if op is div/sqrt - start div/sqrt
assign DivSqrtStartD = isDivSqrt & ~isFMA; assign DivSqrtStartD = ~|FResultSelD; // is FResultSelD == 000
//operation control for each fp operation //operation control for each fp operation
//has to be expanded over standard to account for //has to be expanded over standard to account for
@ -126,23 +140,74 @@ module fctrl (
// //
// //
//add/cvt chooses unsigned conversion here
assign OpCtrlD[3] = (isAddSub & Rs2D[0]) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0);
//add/cvt chooses FP/int or int/FP conversion always_comb begin
assign OpCtrlD[2] = (isAddSub & (Funct7D[6] & Funct7D[5] & ~Funct7D[4])) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & 1'b0) | (isSign & 1'b0); IllegalFPUInstrD = 0;
//compare chooses equals case (FResultSelD)
//sign chooses sgnjx // div/sqrt
//add/cvt can chooses between abs/neg functions, but they aren't used in the // fdiv = ???0
//wally-spec // fsqrt = ???1
assign OpCtrlD[1] = (isAddSub & 1'b0) | (isFMA & 1'b0) | (isDivSqrt & 1'b0) | (isCmp & FrmW[2]) | (isSign & FrmW[1]); 3'b000 : OpCtrlD = {3'b0, Funct7D[5]};
//divide chooses between div/sqrt // cmp
//compare chooses between LT and LE // fmin = ?100
//sign chooses between sgnj and sgnjn // fmax = ?101
//add/cvt chooses between add/sub or single-precision conversion // feq = ?010
assign OpCtrlD[0] = (isAddSub & (Funct7D[2] | Funct7D[0])) | (isFMA & 1'b0) | (isDivSqrt & Funct7D[5]) | (isCmp & FrmW[1]) | (isSign & FrmW[0]); // flt = ?001
// fle = ?011
// {?, is min or max, is eq or le, is lt or le}
3'b001 : OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])};
//fma/mult
// fmadd = ?000
// fmsub = ?001
// fnmadd = ?010
// fnmsub = ?011
// fmul = ?100
// {?, is mul, is negitive, is sub}
3'b010 : OpCtrlD = {1'b0, OpD[4:2]};
// sgn inj
// fsgnj = ??00
// fsgnjn = ??01
// fsgnjx = ??10
3'b011 : OpCtrlD = {2'b0, Funct3D[1:0]};
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// fcvt.w.s = 0100
// fcvt.wu.s = 0101
// fcvt.s.w = 0110
// fcvt.s.wu = 0111
// fcvt.s.d = 0010
// fcvt.w.d = 1100
// fcvt.wu.d = 1101
// fcvt.d.w = 1110
// fcvt.d.wu = 1111
// fcvt.d.s = 1000
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub
3'b100 : OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])};
// classify {?, ?, ?, ?}
3'b101 : OpCtrlD = 4'b0;
// output SrcAW
// fmv.w.x = ???0
// fmv.w.d = ???1
3'b110 : OpCtrlD = {3'b0, Funct7D[0]};
// output ReadData1
// flw = ?000
// fld = ?001
// fsw = ?010
// fsd = ?011
// fmv.x.w = ?100
// fmv.d.w = ?101
// {?, is mv, is store, is double or fcvt.d.w}
3'b111 : OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])};
default : begin OpCtrlD = 4'bxxxx; IllegalFPUInstrD = isFP; end
endcase
end
//write to integer source if conv to int occurs //write to integer source if conv to int occurs
//AND of Funct7 for int results //AND of Funct7 for int results
assign WriteIntD = isCvt & (Funct7D[6] & Funct7D[5] & ~Funct7D[4] & ~Funct7D[3] & ~Funct7D[2] & ~Funct7D[1]); // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv
assign WriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b001)&OpD[6]);
// if not writting to int reg and not a store function and not move
assign FRegWriteD = ~WriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]);
endmodule endmodule

View File

@ -21,7 +21,7 @@ module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE);
output logic prodinfE; // X*Y larger than max possible output logic prodinfE; // X*Y larger than max possible
// If any input is NaN, propagate the NaN // If any input logic is NaN, propagate the NaN
assign nanE = xnanE || ynanE || znanE; assign nanE = xnanE || ynanE || znanE;

View File

@ -55,8 +55,8 @@ logic suminf;
assign FmaFlagsM[2] = suminf && ~inf; assign FmaFlagsM[2] = suminf && ~inf;
// Set the underflow flag for the following cases: // Set the underflow flag for the following cases:
// 1) Any input is denormalized // 1) Any input logic is denormalized
// 2) Output would be denormalized or smaller // 2) output logic would be denormalized or smaller
assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM)); assign FmaFlagsM[1] = (sumuf && ~inf && ~prodinfM && ~nanM) || (killprodM & zzeroM & ~(yzeroM | xzeroM));
@ -70,7 +70,7 @@ logic suminf;
// Set invalid flag for following cases: // Set invalid flag for following cases:
// 1) Inf - Inf // 1) Inf - Inf
// 2) 0 * Inf // 2) 0 * Inf
// 3) Output = NaN (this is not part of the IEEE spec, only 486 proj) // 3) output logic = NaN (this is not part of the IEEE spec, only 486 proj)
assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) || assign FmaFlagsM[4] = (xinfM || yinfM || prodinfM) && zinfM && (xsign ^ ysign ^ zsign) ||
xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above xzeroM && yinfM || yzeroM && xinfM;// KEP remove case 3) above

View File

@ -34,7 +34,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE, , xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
xinfE, yinfE, zinfE, nanE, prodinfE); xinfE, yinfE, zinfE, nanE, prodinfE);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
//***clean up code, comment, fix names, and c3f000200003fffe * 0000000000000001 + 001ffffffffffffe error
input logic [63:0] ReadData1E; // input 1 input logic [63:0] ReadData1E; // input 1
input logic [63:0] ReadData2E; // input 2 input logic [63:0] ReadData2E; // input 2
input logic [63:0] ReadData3E; // input 3 input logic [63:0] ReadData3E; // input 3
@ -42,7 +42,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
output logic [12:0] aligncntE; // status flags output logic [12:0] aligncntE; // status flags
output logic [105:0] rE; // one result of partial product sum output logic [105:0] rE; // one result of partial product sum
output logic [105:0] sE; // other result of partial products output logic [105:0] sE; // other result of partial products
output logic [163:0] tE; // output of alignment shifter output logic [163:0] tE; // output logic of alignment shifter
output logic [12:0] aeE; // multiplier expoent output logic [12:0] aeE; // multiplier expoent
output logic bsE; // sticky bit of addend output logic bsE; // sticky bit of addend
output logic killprodE; // ReadData3E >> product output logic killprodE; // ReadData3E >> product
@ -65,7 +65,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
// Internal nodes // Internal nodes
// output [12:0] aligncntE; // shift count for alignment // output logic [12:0] aligncntE; // shift count for alignment
logic prodof; // ReadData1E*ReadData2E out of range logic prodof; // ReadData1E*ReadData2E out of range
@ -95,7 +95,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE,
special special(.*); special special(.*);
// Instantiate control output // Instantiate control output logic
flag1 flag1(.*); flag1 flag1(.*);

View File

@ -15,13 +15,13 @@
// normalize Normalization shifter // normalize Normalization shifter
// round Rounding of result // round Rounding of result
// exception Handles exceptional cases // exception Handles exceptional cases
// bypass Handles bypass of result to ReadData1M or ReadData3M inputs // bypass Handles bypass of result to ReadData1M or ReadData3M input logics
// sign One bit sign handling block // sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.) // special Catch special cases (input logics = 0 / infinity / etc.)
// //
// The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by // The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to // RN, RZ, RM, or RP. The result is optionally bypassed back to
// the ReadData1M or ReadData3M inputs for use on the next cycle. In addition, four signals // the ReadData1M or ReadData3M input logics for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates // are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software; // an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IMMM flags. // the other three signals are IMMM flags.
@ -39,9 +39,9 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
); );
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
input logic [63:0] ReadData1M; // input 1 input logic [63:0] ReadData1M; // input logic 1
input logic [63:0] ReadData2M; // input 2 input logic [63:0] ReadData2M; // input logic 2
input logic [63:0] ReadData3M; // input 3 input logic [63:0] ReadData3M; // input logic 3
input logic [2:0] FrmM; // Rounding mode input logic [2:0] FrmM; // Rounding mode
input logic [12:0] aligncntM; // status flags input logic [12:0] aligncntM; // status flags
input logic [105:0] rM; // one result of partial product sum input logic [105:0] rM; // one result of partial product sum
@ -67,9 +67,12 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM,
input logic nanM; input logic nanM;
input logic [8:0] sumshiftM; input logic [8:0] sumshiftM;
input logic sumshiftzeroM; input logic sumshiftzeroM;
output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M
output logic [4:0] FmaFlagsM; // status flags output logic [4:0] FmaFlagsM; // status flags
// Internal nodes // Internal nodes
logic [163:0] sum; // output of carry prop adder logic [163:0] sum; // output of carry prop adder
logic [53:0] v; // normalized sum, R, S bits logic [53:0] v; // normalized sum, R, S bits

View File

@ -84,13 +84,14 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
wire [127:0] regr_out; wire [127:0] regr_out;
wire [2:0] sel_muxa, sel_muxb; wire [2:0] sel_muxa, sel_muxb;
wire sel_muxr; wire sel_muxr;
wire load_rega, load_regb, load_regc, load_regd, load_regr; wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
wire donev, sel_muxrv, sel_muxsv; wire donev, sel_muxrv, sel_muxsv;
wire [1:0] sel_muxav, sel_muxbv; wire [1:0] sel_muxav, sel_muxbv;
wire load_regav, load_regbv, load_regcv; wire load_regav, load_regbv, load_regcv;
wire load_regrv, load_regsv; wire load_regrv, load_regsv;
logic exp_cout1, exp_cout2, exp_odd, open;
// Convert the input operands to their appropriate forms based on // Convert the input operands to their appropriate forms based on
// the orignal operands, the DivOpType , and their precision DivP. // the orignal operands, the DivOpType , and their precision DivP.
// Single precision inputs are converted to double precision // Single precision inputs are converted to double precision
@ -138,7 +139,7 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
// FSM : control divider // FSM : control divider
fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd, fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
clk, reset, DivStart, error, DivOpType); clk, reset, DivStart, DivOpType);
// Round the mantissa to a 52-bit value, with the leading one // Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and // removed. The rounding units also handles special cases and
@ -191,6 +192,9 @@ module brent_kung (c, p, g);
input [13:0] g; input [13:0] g;
output [14:1] c; output [14:1] c;
logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
// parallel-prefix, Brent-Kung // parallel-prefix, Brent-Kung
// Stage 1: Generates G/DivP pairs that span 1 bits // Stage 1: Generates G/DivP pairs that span 1 bits

View File

@ -1,19 +1,22 @@
`include "wally-config.vh" `include "wally-config.vh"
// `include "../../config/rv64icfd/wally-config.vh" //debug
module fpu ( module fpu (
//input logic [2:0] FrmD, //input logic [2:0] FrmD,
input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic reset, input logic reset,
//input logic clear, // *** what is this used for? //input logic clear, // *** not being used anywhere
input logic clk, input logic clk,
input logic [31:0] InstrD, input logic [31:0] InstrD,
input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
output logic [4:0] SetFflagsM, output logic [4:0] SetFflagsM,
output logic [31:0] FSROutW, output logic [31:0] FSROutW,
output logic DivSqrtDoneE, output logic DivSqrtDoneE,
output logic FInvalInstrD, output logic IllegalFPUInstrD,
output logic [`XLEN-1:0] FPUResultW); output logic [`XLEN-1:0] FPUResultW);
//NOTE: //NOTE:
@ -45,12 +48,12 @@ module fpu (
localparam PipeEnable = 1'b1; localparam PipeEnable = 1'b1;
always_comb begin always_comb begin
PipeEnableDE = PipeEnable; PipeEnableDE = StallE;
PipeEnableEM = PipeEnable; PipeEnableEM = StallM;
PipeEnableMW = PipeEnable; PipeEnableMW = StallW;
PipeClearDE = PipeClear; PipeClearDE = FlushE;
PipeClearEM = PipeClear; PipeClearEM = FlushM;
PipeClearMW = PipeClear; PipeClearMW = FlushW;
end end
@ -63,33 +66,33 @@ module fpu (
// //
//wally-spec D stage control logic signal instantiation //wally-spec D stage control logic signal instantiation
logic IllegalFPUInstrFaultD;
logic FRegWriteD; logic FRegWriteD;
logic [2:0] FResultSelD; logic [2:0] FResultSelD;
logic [2:0] FrmD; logic [2:0] FrmD;
logic PD; logic FmtD;
logic DivSqrtStartD; logic DivSqrtStartD;
logic [3:0] OpCtrlD; logic [3:0] OpCtrlD;
logic WriteIntD; logic WriteIntD;
//top-level controller for FPU //top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Rs1D(InstrD[19:15]), .FrmW(InstrD[14:12]), .WriteEnD(FRegWriteD), .WriteSelD(FResultSelD), .FmtD(PD), .*); fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
//instantiation of D stage regfile signals (includes some W stage signals //instantiation of D stage regfile signals (includes some W stage signals
//for easy reference) //for easy reference)
logic [2:0] FrmW; logic [2:0] FrmW;
logic WriteEnW; logic FmtW;
logic FRegWriteW;
logic [4:0] RdW, Rs1D, Rs2D, Rs3D; logic [4:0] RdW, Rs1D, Rs2D, Rs3D;
logic [`XLEN-1:0] WriteDataW; logic [`XLEN-1:0] WriteDataW;
logic [63:0] FPUResultDirW;
logic [`XLEN-1:0] ReadData1D, ReadData2D, ReadData3D; logic [`XLEN-1:0] ReadData1D, ReadData2D, ReadData3D;
//regfile instantiation //regfile instantiation
freg3adr fpregfile (FrmW, reset, PipeClear, clk, RdW, WriteEnW, Rs1D, Rs2D, Rs3D, WriteDataW, ReadData1D, ReadData2D, ReadData3D); freg3adr fpregfile (FmtW, reset, PipeClear, clk, RdW, FRegWriteW, InstrD[19:15], InstrD[24:20], InstrD[31:27], FPUResultDirW, ReadData1D, ReadData2D, ReadData3D);
always_comb begin
FrmW = InstrD[14:12];
end
//always_comb begin
// FrmW = InstrD[14:12];
//end
// //
//END DECODE STAGE //END DECODE STAGE
//######################################### //#########################################
@ -102,7 +105,7 @@ module fpu (
logic FRegWriteE; logic FRegWriteE;
logic [2:0] FResultSelE; logic [2:0] FResultSelE;
logic [2:0] FrmE; logic [2:0] FrmE;
logic PE; logic FmtE;
logic DivSqrtStartE; logic DivSqrtStartE;
logic [3:0] OpCtrlE; logic [3:0] OpCtrlE;
@ -187,9 +190,10 @@ module fpu (
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE); flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, PD, PE); flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
flopenrc #(4) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE); flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
flopenrc #(1) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE); flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
// //
//END D/E PIPE //END D/E PIPE
@ -205,10 +209,10 @@ module fpu (
fpdiv fpdivsqrt (.*); fpdiv fpdivsqrt (.*);
//first of two-stage instance of floating-point add/cvt unit //first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, AddOp1E, AddOp2E, AddRmE, AddOpTypeE, AddPE, AddOvEnE, AddUnEnE); fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, ReadData1E, ReadData2E, FrmE, OpCtrlE, FmtE);
//first of two-stage instance of floating-point comparator //first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, CmpOp1E, CmpOp2E, CmpSelE); fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, ReadData1E, ReadData2E, OpCtrlE[1:0]);
//first and only instance of floating-point sign converter //first and only instance of floating-point sign converter
fpusgn fpsgn (.*); fpusgn fpsgn (.*);
@ -221,33 +225,33 @@ module fpu (
//truncate to 64 bits //truncate to 64 bits
//(causes warning during compilation - case never reached) //(causes warning during compilation - case never reached)
if(`XLEN > 64) begin // if(`XLEN > 64) begin // ***KEP this isn't usedand it causes a lint error
DivOp1 <= ReadData1E[`XLEN-1:`XLEN-64]; // DivOp1 = ReadData1E[`XLEN-1:`XLEN-64];
DivOp2 <= ReadData2E[`XLEN-1:`XLEN-64]; // DivOp2 = ReadData2E[`XLEN-1:`XLEN-64];
AddOp1E <= ReadData1E[`XLEN-1:`XLEN-64]; // AddOp1E = ReadData1E[`XLEN-1:`XLEN-64];
AddOp2E <= ReadData2E[`XLEN-1:`XLEN-64]; // AddOp2E = ReadData2E[`XLEN-1:`XLEN-64];
CmpOp1E <= ReadData1E[`XLEN-1:`XLEN-64]; // CmpOp1E = ReadData1E[`XLEN-1:`XLEN-64];
CmpOp2E <= ReadData2E[`XLEN-1:`XLEN-64]; // CmpOp2E = ReadData2E[`XLEN-1:`XLEN-64];
SgnOp1E <= ReadData1E[`XLEN-1:`XLEN-64]; // SgnOp1E = ReadData1E[`XLEN-1:`XLEN-64];
SgnOp2E <= ReadData2E[`XLEN-1:`XLEN-64]; // SgnOp2E = ReadData2E[`XLEN-1:`XLEN-64];
end // end
//zero extend to 64 bits // //zero extend to 64 bits
else begin // else begin
DivOp1 <= {ReadData1E,{64-`XLEN{1'b0}}}; // DivOp1 = {ReadData1E,{64-`XLEN{1'b0}}};
DivOp2 <= {ReadData2E,{64-`XLEN{1'b0}}}; // DivOp2 = {ReadData2E,{64-`XLEN{1'b0}}};
AddOp1E <= {ReadData1E,{64-`XLEN{1'b0}}}; // AddOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
AddOp2E <= {ReadData2E,{64-`XLEN{1'b0}}}; // AddOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
CmpOp1E <= {ReadData1E,{64-`XLEN{1'b0}}}; // CmpOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
CmpOp2E <= {ReadData2E,{64-`XLEN{1'b0}}}; // CmpOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
SgnOp1E <= {ReadData1E,{64-`XLEN{1'b0}}}; // SgnOp1E = {ReadData1E,{64-`XLEN{1'b0}}};
SgnOp2E <= {ReadData2E,{64-`XLEN{1'b0}}}; // SgnOp2E = {ReadData2E,{64-`XLEN{1'b0}}};
end // end
//assign op codes //assign op codes
AddOpTypeE[3:0] <= OpCtrlE[3:0]; AddOpTypeE[3:0] = OpCtrlE[3:0];
CmpSelE[1:0] <= OpCtrlE[1:0]; CmpSelE[1:0] = OpCtrlE[1:0];
DivOpType <= OpCtrlE[0]; DivOpType = OpCtrlE[0];
SgnOpCodeE[1:0] <= OpCtrlE[1:0]; SgnOpCodeE[1:0] = OpCtrlE[1:0];
end end
@ -266,7 +270,7 @@ module fpu (
logic FRegWriteM; logic FRegWriteM;
logic [2:0] FResultSelM; logic [2:0] FResultSelM;
logic [2:0] FrmM; logic [2:0] FrmM;
logic PM; logic FmtM;
logic [3:0] OpCtrlM; logic [3:0] OpCtrlM;
//instantiate M stage FMA signals here ***rename fma signals and resize for XLEN //instantiate M stage FMA signals here ***rename fma signals and resize for XLEN
@ -340,17 +344,17 @@ module fpu (
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM);
flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM);
flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM);
flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM);
flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM);
flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM);
flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM);
flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM);
@ -414,8 +418,9 @@ module fpu (
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM); flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM);
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, PE, PM); flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
flopenrc #(4) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM); flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
// //
//END E/M PIPE //END E/M PIPE
@ -443,9 +448,7 @@ module fpu (
// //
//wally-spec W stage control logic signal instantiation //wally-spec W stage control logic signal instantiation
logic FRegWriteW;
logic [2:0] FResultSelW; logic [2:0] FResultSelW;
logic PW;
//instantiate W stage fma signals here //instantiate W stage fma signals here
logic [63:0] FmaResultW; logic [63:0] FmaResultW;
@ -470,9 +473,14 @@ module fpu (
logic AddDenormW; logic AddDenormW;
//instantiation of W stage cmp signals //instantiation of W stage cmp signals
logic [63:0] CmpResultW;
logic CmpInvalidW; logic CmpInvalidW;
logic [1:0] CmpFCCW; logic [1:0] CmpFCCW;
//instantiation of W stage classify signals
logic [63:0] ClassResultW;
logic [4:0] ClassFlagsW;
//***************** //*****************
//fma M/W pipe registers //fma M/W pipe registers
//***************** //*****************
@ -510,7 +518,9 @@ module fpu (
//***************** //*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW); flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW);
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, PM, PW); flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
////END M/W PIPE ////END M/W PIPE
//***************************************** //*****************************************
@ -527,21 +537,61 @@ module fpu (
//set to cmp flags //set to cmp flags
//iff bit one is low - if bit zero is active set to add/cvt flags - otherwise //iff bit one is low - if bit zero is active set to add/cvt flags - otherwise
//set to div/sqrt flags //set to div/sqrt flags
assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : ( //assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
(FResultSelW[1]) ? // (FResultSelW[1]) ?
( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) ) // ( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) )
: ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) ) // : ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) )
); // );
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUFlagsW = DivFlagsW;
// cmp
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
//fma/mult
3'b010 : FPUFlagsW = FmaFlagsW;
// sgn inj
3'b011 : FPUFlagsW = SgnFlagsW;
// add/sub/cnvt
3'b100 : FPUFlagsW = AddFlagsW;
// classify
3'b101 : FPUFlagsW = ClassFlagsW;
// output SrcAW
3'b110 : FPUFlagsW = 5'b0;
// output ReadData1
3'b111 : FPUFlagsW = 5'b0;
default : FPUFlagsW = 5'bxxxxx;
endcase
end
//result mux via in-line ternaries //result mux via in-line ternaries
logic [63:0] FPUResultDirW;
//the uses the same logic as for flag signals //the uses the same logic as for flag signals
assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : ( //assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
(FResultSelW[1]) ? // (FResultSelW[1]) ?
( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) ) // ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) )
: ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) ) // : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) )
); // );
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUResultDirW = DivResultW;
// cmp
3'b001 : FPUResultDirW = CmpResultW;
//fma/mult
3'b010 : FPUResultDirW = FmaResultW;
// sgn inj
3'b011 : FPUResultDirW = SgnResultW;
// add/sub/cnvt
3'b100 : FPUResultDirW = AddResultW;
// classify
3'b101 : FPUResultDirW = ClassResultW;
// output SrcAW
3'b110 : FPUResultDirW = SrcAW;
// output ReadData1
3'b111 : FPUResultDirW = ReadData1W;
default : FPUResultDirW = {64{1'bx}};
endcase
end
//interface between XLEN size datapath and double-precision sized //interface between XLEN size datapath and double-precision sized
//floating-point results //floating-point results
// //
@ -555,11 +605,12 @@ module fpu (
// Repetition multiplier must be constant. // Repetition multiplier must be constant.
//if(`XLEN > 64) begin //if(`XLEN > 64) begin
// FPUResultW <= {FPUResultDirW,{XLENDIFF{1'b0}}}; // FPUResultW = {FPUResultDirW,{XLENDIFF{1'b0}}};
//end //end
//truncate //truncate
//else begin //else begin
FPUResultW <= FPUResultDirW[63:64-`XLEN]; FPUResultW = FPUResultDirW[63:64-`XLEN];
SetFflagsM = FPUFlagsW;
//end //end
end end

View File

@ -27,18 +27,16 @@
// //
module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin, OvEn, UnEn); module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin);
input [63:0] op1; // 1st input operand (A) input logic [63:0] op1; // 1st input operand (A)
input [63:0] op2; // 2nd input operand (B) input logic [63:0] op2; // 2nd input operand (B)
input [2:0] rm; // Rounding mode - specify values input logic [2:0] rm; // Rounding mode - specify values
input [3:0] op_type; // Function opcode input logic [3:0] op_type; // Function opcode
input Pin; // Result Precision (0 for double, 1 for single) input logic Pin; // Result Precision (1 for double, 0 for single)
input OvEn; // Overflow trap enabled
input UnEn; // Underflow trap enabled
wire P; wire P;
assign P = Pin | op_type[2]; assign P = ~Pin | op_type[2];
wire [63:0] IntValue; wire [63:0] IntValue;
wire [11:0] exp1, exp2; wire [11:0] exp1, exp2;
@ -56,23 +54,23 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
wire zeroB; wire zeroB;
wire [5:0] align_shift; wire [5:0] align_shift;
output [63:0] Float1; output logic [63:0] Float1;
output [63:0] Float2; output logic [63:0] Float2;
output [10:0] exponent; output logic [10:0] exponent;
output [10:0] exponent_postsum; output logic [10:0] exponent_postsum;
output [10:0] exp1_denorm, exp2_denorm; output logic [11:0] exp1_denorm, exp2_denorm;//KEP used to be [10:0]
output [63:0] sum, sum_tc; output logic [63:0] sum, sum_tc;
output [3:0] sel_inv; output logic [3:0] sel_inv;
output corr_sign; output logic corr_sign;
output signA; output logic signA;
output op1_Norm, op2_Norm; output logic op1_Norm, op2_Norm;
output opA_Norm, opB_Norm; output logic opA_Norm, opB_Norm;
output Invalid; output logic Invalid;
output DenormIn; output logic DenormIn;
// output exp_valid; // output logic exp_valid;
output convert; output logic convert;
output swap; output logic swap;
output normal_overflow; output logic normal_overflow;
wire [5:0] ZP_mantissaA; wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB; wire [5:0] ZP_mantissaB;
wire ZV_mantissaA; wire ZV_mantissaA;
@ -129,15 +127,15 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB); lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents // Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign exp1_denorm = swap ? (exp1 - ZP_mantissaB) : (exp1 - ZP_mantissaA); assign exp1_denorm = swap ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
assign exp2_denorm = swap ? (exp2 - ZP_mantissaA) : (exp2 - ZP_mantissaB); assign exp2_denorm = swap ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
// Determine the alignment shift and limit it to 63. If any bit from // Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones. // exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = swap ? exp_diff2 : exp_diff1; assign exp_shift = swap ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9] assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6]; | exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift | {6{exp_gt63}}; assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
// Unpack the 52-bit mantissas to 57-bit numbers of the form. // Unpack the 52-bit mantissas to 57-bit numbers of the form.
// 001.M[51]M[50] ... M[1]M[0]00 // 001.M[51]M[50] ... M[1]M[0]00
@ -193,7 +191,8 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
// Finds normal underflow result to determine whether to round final exponent down // Finds normal underflow result to determine whether to round final exponent down
assign normal_overflow = (DenormIn & (sum == 16'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]); //***KEP used to be (sum == 16'h0) I am unsure what it's supposed to be
assign normal_overflow = (DenormIn & (sum == 64'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
endmodule // fpadd endmodule // fpadd

View File

@ -27,7 +27,7 @@
// //
module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM); module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
input [63:0] AddOp1M; // 1st input operand (A) input [63:0] AddOp1M; // 1st input operand (A)
input [63:0] AddOp2M; // 2nd input operand (B) input [63:0] AddOp2M; // 2nd input operand (B)
@ -51,7 +51,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
input AddCorrSignM; input AddCorrSignM;
input AddConvertM; input AddConvertM;
input AddSwapM; input AddSwapM;
input AddNormOvflowM; // input AddNormOvflowM;
output [63:0] AddResultM; // Result of operation output [63:0] AddResultM; // Result of operation
output [4:0] AddFlagsM; // IEEE exception flags output [4:0] AddFlagsM; // IEEE exception flags
@ -80,6 +80,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
wire Float2_sum_tc_comp; wire Float2_sum_tc_comp;
wire normal_underflow; wire normal_underflow;
wire [63:0] sum_corr; wire [63:0] sum_corr;
logic AddNormOvflowM;
//AddExponentM value pre-rounding with considerations for denormalized //AddExponentM value pre-rounding with considerations for denormalized
//cases/conversion cases //cases/conversion cases
@ -116,7 +117,8 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM)); ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
// Finds normal underflow result to determine whether to round final AddExponentM down // Finds normal underflow result to determine whether to round final AddExponentM down
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 16'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]); //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
// Leading-Zero Detector. Determine the size of the shift needed for // Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is // normalization. If sum_corrected is all zeros, the exp_valid is

View File

@ -1,8 +1,9 @@
`include "wally-config.vh" `include "wally-config.vh"
// `include "../../config/rv64icfd/wally-config.vh" //debug
module freg1adr ( module freg1adr (
input logic [2:0] frm, input logic FmtW,
input logic reset, input logic reset,
input logic clear, input logic clear,
input logic clk, input logic clk,
@ -13,7 +14,7 @@ module freg1adr (
output logic [`XLEN-1:0] readData); output logic [`XLEN-1:0] readData);
//note - not word aligning based on precision of //note - not word aligning based on precision of
//operation (frm) //operation (FmtW)
//reg number should remain static, but it doesn't hurt //reg number should remain static, but it doesn't hurt
//to parameterize //to parameterize
@ -139,7 +140,7 @@ endmodule
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
module freg2adr ( module freg2adr (
input logic [2:0] frm, input logic FmtW,
input logic reset, input logic reset,
input logic clear, input logic clear,
input logic clk, input logic clk,
@ -152,7 +153,7 @@ module freg2adr (
output logic [`XLEN-1:0] readData2); output logic [`XLEN-1:0] readData2);
//note - not word aligning based on precision of //note - not word aligning based on precision of
//operation (frm) //operation (FmtW)
//reg number should remain static, but it doesn't hurt //reg number should remain static, but it doesn't hurt
//to parameterize //to parameterize
@ -310,7 +311,7 @@ endmodule
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
module freg3adr ( module freg3adr (
input logic [2:0] frm, input logic FmtW,
input logic reset, input logic reset,
input logic clear, input logic clear,
input logic clk, input logic clk,
@ -325,7 +326,7 @@ module freg3adr (
output logic [`XLEN-1:0] readData3); output logic [`XLEN-1:0] readData3);
//note - not word aligning based on precision of //note - not word aligning based on precision of
//operation (frm) //operation (FmtW)
//reg number should remain static, but it doesn't hurt //reg number should remain static, but it doesn't hurt
//to parameterize //to parameterize

View File

@ -1,12 +1,12 @@
module fsm (done, load_rega, load_regb, load_regc, module fsm (done, load_rega, load_regb, load_regc,
load_regd, load_regr, load_regs, load_regd, load_regr, load_regs,
sel_muxa, sel_muxb, sel_muxr, sel_muxa, sel_muxb, sel_muxr,
clk, reset, start, error, op_type); clk, reset, start, op_type);
input clk; input clk;
input reset; input reset;
input start; input start;
input error; // input error;
input op_type; input op_type;
output done; output done;
@ -50,9 +50,9 @@ module fsm (done, load_rega, load_regb, load_regc,
always @(posedge clk) always @(posedge clk)
begin begin
if(reset==1'b1) if(reset==1'b1)
CURRENT_STATE<=S0; CURRENT_STATE=S0;
else else
CURRENT_STATE<=NEXT_STATE; CURRENT_STATE=NEXT_STATE;
end end
always @(*) always @(*)
@ -72,7 +72,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S0; NEXT_STATE = S0;
end end
else if (start==1'b1 && op_type==1'b0) else if (start==1'b1 && op_type==1'b0)
begin begin
@ -86,7 +86,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S1; NEXT_STATE = S1;
end // if (start==1'b1 && op_type==1'b0) end // if (start==1'b1 && op_type==1'b0)
else if (start==1'b1 && op_type==1'b1) else if (start==1'b1 && op_type==1'b1)
begin begin
@ -100,7 +100,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S13; NEXT_STATE = S13;
end end
end // case: S0 end // case: S0
S1: S1:
@ -115,7 +115,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S2; NEXT_STATE = S2;
end end
S2: // iteration 1 S2: // iteration 1
begin begin
@ -129,7 +129,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S3; NEXT_STATE = S3;
end end
S3: S3:
begin begin
@ -143,7 +143,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S4; NEXT_STATE = S4;
end end
S4: // iteration 2 S4: // iteration 2
begin begin
@ -157,7 +157,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S5; NEXT_STATE = S5;
end end
S5: S5:
begin begin
@ -171,7 +171,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; // add sel_muxr = 1'b0; // add
NEXT_STATE <= S6; NEXT_STATE = S6;
end end
S6: // iteration 3 S6: // iteration 3
begin begin
@ -185,7 +185,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S8; NEXT_STATE = S8;
end end
S7: S7:
begin begin
@ -199,7 +199,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S8; NEXT_STATE = S8;
end // case: S7 end // case: S7
S8: // q,qm,qp S8: // q,qm,qp
begin begin
@ -213,7 +213,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S9; NEXT_STATE = S9;
end end
S9: // rem S9: // rem
begin begin
@ -227,7 +227,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE <= S10; NEXT_STATE = S10;
end end
S10: // done S10: // done
begin begin
@ -241,7 +241,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S0; NEXT_STATE = S0;
end end
S13: // start of sqrt path S13: // start of sqrt path
begin begin
@ -255,7 +255,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S14; NEXT_STATE = S14;
end end
S14: S14:
begin begin
@ -269,7 +269,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b100; sel_muxb = 3'b100;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S15; NEXT_STATE = S15;
end end
S15: // iteration 1 S15: // iteration 1
begin begin
@ -283,7 +283,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S16; NEXT_STATE = S16;
end end
S16: S16:
begin begin
@ -297,7 +297,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S17; NEXT_STATE = S17;
end end
S17: S17:
begin begin
@ -311,7 +311,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S18; NEXT_STATE = S18;
end end
S18: // iteration 2 S18: // iteration 2
begin begin
@ -325,7 +325,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S19; NEXT_STATE = S19;
end end
S19: S19:
begin begin
@ -339,7 +339,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S20; NEXT_STATE = S20;
end end
S20: S20:
begin begin
@ -353,7 +353,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S21; NEXT_STATE = S21;
end end
S21: // iteration 3 S21: // iteration 3
begin begin
@ -367,7 +367,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S22; NEXT_STATE = S22;
end end
S22: S22:
begin begin
@ -381,7 +381,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S23; NEXT_STATE = S23;
end end
S23: S23:
begin begin
@ -395,7 +395,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S24; NEXT_STATE = S24;
end end
S24: // q,qm,qp S24: // q,qm,qp
begin begin
@ -409,7 +409,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S25; NEXT_STATE = S25;
end end
S25: // rem S25: // rem
begin begin
@ -423,7 +423,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b110; sel_muxb = 3'b110;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE <= S26; NEXT_STATE = S26;
end end
S26: // done S26: // done
begin begin
@ -437,7 +437,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S0; NEXT_STATE = S0;
end end
default: default:
begin begin
@ -451,7 +451,7 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE <= S0; NEXT_STATE = S0;
end end
endcase // case(CURRENT_STATE) endcase // case(CURRENT_STATE)
end // always @ (CURRENT_STATE or X) end // always @ (CURRENT_STATE or X)

View File

@ -31,6 +31,56 @@ module ladner_fischer128 (c, p, g);
output [128:1] c; output [128:1] c;
logic G_1_0, G_3_2, P_3_2, G_5_4, P_5_4, G_7_6, P_7_6, G_9_8, P_9_8, G_11_10, P_11_10, G_13_12
, P_13_12, G_15_14, P_15_14, G_17_16, P_17_16, G_19_18, P_19_18, G_21_20, P_21_20, G_23_22
, P_23_22, G_25_24, P_25_24, G_27_26, P_27_26, G_29_28, P_29_28, G_31_30, P_31_30, G_33_32
, P_33_32, G_35_34, P_35_34, G_37_36, P_37_36, G_39_38, P_39_38, G_41_40, P_41_40, G_43_42
, P_43_42, G_45_44, P_45_44, G_47_46, P_47_46, G_49_48, P_49_48, G_51_50, P_51_50, G_53_52
, P_53_52, G_55_54, P_55_54, G_57_56, P_57_56, G_59_58, P_59_58, G_61_60, P_61_60, G_63_62
, P_63_62, G_65_64, P_65_64, G_67_66, P_67_66, G_69_68, P_69_68, G_71_70, P_71_70, G_73_72
, P_73_72, G_75_74, P_75_74, G_77_76, P_77_76, G_79_78, P_79_78, G_81_80, P_81_80, G_83_82
, P_83_82, G_85_84, P_85_84, G_87_86, P_87_86, G_89_88, P_89_88, G_91_90, P_91_90, G_93_92
, P_93_92, G_95_94, P_95_94, G_97_96, P_97_96, G_99_98, P_99_98, G_101_100, P_101_100, G_103_102
, P_103_102, G_105_104, P_105_104, G_107_106, P_107_106, G_109_108, P_109_108, G_111_110, P_111_110
, G_113_112, P_113_112, G_115_114, P_115_114, G_117_116, P_117_116, G_119_118, P_119_118, G_121_120
, P_121_120, G_123_122, P_123_122, G_125_124, P_125_124, G_127_126, P_127_126, G_3_0, G_7_4, P_7_4
, G_11_8, P_11_8, G_15_12, P_15_12, G_19_16, P_19_16, G_23_20, P_23_20, G_27_24, P_27_24, G_31_28
, P_31_28, G_35_32, P_35_32, G_39_36, P_39_36, G_43_40, P_43_40, G_47_44, P_47_44, G_51_48, P_51_48
, G_55_52, P_55_52, G_59_56, P_59_56, G_63_60, P_63_60, G_67_64, P_67_64, G_71_68, P_71_68, G_75_72
, P_75_72, G_79_76, P_79_76, G_83_80, P_83_80, G_87_84, P_87_84, G_91_88, P_91_88, G_95_92, P_95_92
, G_99_96, P_99_96, G_103_100, P_103_100, G_107_104, P_107_104, G_111_108, P_111_108, G_115_112
, P_115_112, G_119_116, P_119_116, G_123_120, P_123_120, G_127_124, P_127_124, G_5_0, G_7_0, G_13_8
, P_13_8, G_15_8, P_15_8, G_21_16, P_21_16, G_23_16, P_23_16, G_29_24, P_29_24, G_31_24, P_31_24
, G_37_32, P_37_32, G_39_32, P_39_32, G_45_40, P_45_40, G_47_40, P_47_40, G_53_48, P_53_48, G_55_48
, P_55_48, G_61_56, P_61_56, G_63_56, P_63_56, G_69_64, P_69_64, G_71_64, P_71_64, G_77_72, P_77_72
, G_79_72, P_79_72, G_85_80, P_85_80, G_87_80, P_87_80, G_93_88, P_93_88, G_95_88, P_95_88, G_101_96
, P_101_96, G_103_96, P_103_96, G_109_104, P_109_104, G_111_104, P_111_104, G_117_112, P_117_112
, G_119_112, P_119_112, G_125_120, P_125_120, G_127_120, P_127_120, G_9_0, G_11_0, G_13_0, G_15_0, G_25_16
, P_25_16, G_27_16, P_27_16, G_29_16, P_29_16, G_31_16, P_31_16, G_41_32, P_41_32, G_43_32, P_43_32, G_45_32
, P_45_32, G_47_32, P_47_32, G_57_48, P_57_48, G_59_48, P_59_48, G_61_48, P_61_48, G_63_48, P_63_48, G_73_64
, P_73_64, G_75_64, P_75_64, G_77_64, P_77_64, G_79_64, P_79_64, G_89_80, P_89_80, G_91_80, P_91_80
, G_93_80, P_93_80, G_95_80, P_95_80, G_105_96, P_105_96, G_107_96, P_107_96, G_109_96, P_109_96
, G_111_96, P_111_96, G_121_112, P_121_112, G_123_112, P_123_112, G_125_112, P_125_112, G_127_112
, P_127_112, G_17_0, G_19_0, G_21_0, G_23_0, G_25_0, G_27_0, G_29_0, G_31_0, G_49_32, P_49_32, G_51_32
, P_51_32, G_53_32, P_53_32, G_55_32, P_55_32, G_57_32, P_57_32, G_59_32, P_59_32, G_61_32, P_61_32
, G_63_32, P_63_32, G_81_64, P_81_64, G_83_64, P_83_64, G_85_64, P_85_64, G_87_64, P_87_64, G_89_64, P_89_64
, G_91_64, P_91_64, G_93_64, P_93_64, G_95_64, P_95_64, G_113_96, P_113_96, G_115_96, P_115_96
, G_117_96, P_117_96, G_119_96, P_119_96, G_121_96, P_121_96, G_123_96, P_123_96, G_125_96, P_125_96
, G_127_96, P_127_96, G_33_0, G_35_0, G_37_0, G_39_0, G_41_0, G_43_0, G_45_0, G_47_0, G_49_0, G_51_0
, G_53_0, G_55_0, G_57_0, G_59_0, G_61_0, G_63_0, G_97_64, P_97_64, G_99_64, P_99_64, G_101_64, P_101_64
, G_103_64, P_103_64, G_105_64, P_105_64, G_107_64, P_107_64, G_109_64, P_109_64, G_111_64, P_111_64
, G_113_64, P_113_64, G_115_64, P_115_64, G_117_64, P_117_64, G_119_64, P_119_64, G_121_64, P_121_64
, G_123_64, P_123_64, G_125_64, P_125_64, G_127_64, P_127_64, G_65_0, G_67_0, G_69_0, G_71_0, G_73_0
, G_75_0, G_77_0, G_79_0, G_81_0, G_83_0, G_85_0, G_87_0, G_89_0, G_91_0, G_93_0, G_95_0, G_97_0
, G_99_0, G_101_0, G_103_0, G_105_0, G_107_0, G_109_0, G_111_0, G_113_0, G_115_0, G_117_0, G_119_0
, G_121_0, G_123_0, G_125_0, G_127_0, G_2_0, G_4_0, G_6_0, G_8_0, G_10_0, G_12_0, G_14_0, G_16_0
, G_18_0, G_20_0, G_22_0, G_24_0, G_26_0, G_28_0, G_30_0, G_32_0, G_34_0, G_36_0, G_38_0, G_40_0
, G_42_0, G_44_0, G_46_0, G_48_0, G_50_0, G_52_0, G_54_0, G_56_0, G_58_0, G_60_0, G_62_0, G_64_0
, G_66_0, G_68_0, G_70_0, G_72_0, G_74_0, G_76_0, G_78_0, G_80_0, G_82_0, G_84_0, G_86_0, G_88_0
, G_90_0, G_92_0, G_94_0, G_96_0, G_98_0, G_100_0, G_102_0, G_104_0, G_106_0, G_108_0, G_110_0, G_112_0
, G_114_0, G_116_0, G_118_0, G_120_0, G_122_0, G_124_0, G_126_0;
// parallel-prefix, Ladner-Fischer // parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits // Stage 1: Generates G/P pairs that span 1 bits

View File

@ -29,6 +29,22 @@ module ladner_fischer64 (c, p, g);
output [64:1] c; output [64:1] c;
logic G_1_0,G_3_2,P_3_2,G_5_4,P_5_4,G_7_6,P_7_6,G_9_8,P_9_8,G_11_10,P_11_10,G_13_12,P_13_12,G_15_14,P_15_14
,G_17_16,P_17_16,G_19_18,P_19_18,G_21_20,P_21_20,G_23_22,P_23_22,G_25_24,P_25_24,G_27_26,P_27_26,G_29_28,P_29_28
,G_31_30,P_31_30,G_33_32,P_33_32,G_35_34,P_35_34,G_37_36,P_37_36,G_39_38,P_39_38,G_41_40,P_41_40,G_43_42,P_43_42
,G_45_44,P_45_44,G_47_46,P_47_46,G_49_48,P_49_48,G_51_50,P_51_50,G_53_52,P_53_52,G_55_54,P_55_54,G_57_56,P_57_56
,G_59_58,P_59_58,G_61_60,P_61_60,G_63_62,P_63_62,G_3_0,G_7_4,P_7_4,G_11_8,P_11_8,G_15_12,P_15_12,G_19_16,P_19_16
,G_23_20,P_23_20,G_27_24,P_27_24,G_31_28,P_31_28,G_35_32,P_35_32,G_39_36,P_39_36,G_43_40,P_43_40,G_47_44,P_47_44
,G_51_48,P_51_48,G_55_52,P_55_52,G_59_56,P_59_56,G_63_60,P_63_60,G_5_0,G_7_0,G_13_8,P_13_8,G_15_8,P_15_8,G_21_16
,P_21_16,G_23_16,P_23_16,G_29_24,P_29_24,G_31_24,P_31_24,G_37_32,P_37_32,G_39_32,P_39_32,G_45_40,P_45_40,G_47_40
,P_47_40,G_53_48,P_53_48,G_55_48,P_55_48,G_61_56,P_61_56,G_63_56,P_63_56,G_9_0,G_11_0,G_13_0,G_15_0,G_25_16
,P_25_16,G_27_16,P_27_16,G_29_16,P_29_16,G_31_16,P_31_16,G_41_32,P_41_32,G_43_32,P_43_32,G_45_32,P_45_32,G_47_32
,P_47_32,G_57_48,P_57_48,G_59_48,P_59_48,G_61_48,P_61_48,G_63_48,P_63_48,G_17_0,G_19_0,G_21_0,G_23_0,G_25_0,G_27_0
,G_29_0,G_31_0,G_49_32,P_49_32,G_51_32,P_51_32,G_53_32,P_53_32,G_55_32,P_55_32,G_57_32,P_57_32,G_59_32,P_59_32
,G_61_32,P_61_32,G_63_32,P_63_32,G_33_0,G_35_0,G_37_0,G_39_0,G_41_0,G_43_0,G_45_0,G_47_0,G_49_0,G_51_0,G_53_0
,G_55_0,G_57_0,G_59_0,G_61_0,G_63_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0,G_14_0,G_16_0,G_18_0,G_20_0,G_22_0
,G_24_0,G_26_0,G_28_0,G_30_0,G_32_0,G_34_0,G_36_0,G_38_0,G_40_0,G_42_0,G_44_0,G_46_0,G_48_0,G_50_0,G_52_0
,G_54_0,G_56_0,G_58_0,G_60_0,G_62_0;
// parallel-prefix, Ladner-Fischer // parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits // Stage 1: Generates G/P pairs that span 1 bits

View File

@ -240,6 +240,7 @@ module multiplier( y, x, Sum, Carry );
// Below are the nets for the partial products (booth) // Below are the nets for the partial products (booth)
wire pp_0_0; wire pp_0_0;
wire pp_0_1;
wire pp_0_2; wire pp_0_2;
wire pp_1_2; wire pp_1_2;
wire pp_0_3; wire pp_0_3;

View File

@ -16,17 +16,18 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
wire [26:0][1:0] add1; wire [26:0][1:0] add1;
wire [26:0][54:0] pp; wire [26:0][54:0] pp;
wire [26:0] e; wire [26:0] e;
logic [17:0][105:0] lv1add; logic [106:0] tmpsE;
logic [11:0][105:0] lv2add; logic [17:0][106:0] lv1add;
logic [7:0][105:0] lv3add; logic [11:0][106:0] lv2add;
logic [3:0][105:0] lv4add; logic [7:0][106:0] lv3add;
logic [21:0][106:0] carryTmp; logic [3:0][106:0] lv4add;
wire [26:0][105:0] acc; logic [21:0][107:0] carryTmp;
wire [26:0][106:0] acc;
// wire [105:0] acc // wire [105:0] acc
genvar i; genvar i;
assign xExt = {2'b0,~(xdenormE|xzeroE),xman}; assign xExt = {1'b0,~(xdenormE|xzeroE),xman};
assign yExt = {2'b0,~(ydenormE|yzeroE),yman, 1'b0}; assign yExt = {1'b0,~(ydenormE|yzeroE),yman, 1'b0};
generate generate
for(i=0; i<27; i=i+1) begin for(i=0; i<27; i=i+1) begin
@ -35,69 +36,70 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
endgenerate endgenerate
assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]}; assign acc[0] = {49'b0,~e[0],e[0],e[0],pp[0]};
assign acc[1] = {50'b01,~e[1],pp[1],add1[0]}; assign acc[1] = {49'b01,~e[1],pp[1],add1[0]};
assign acc[2] = {48'b01,~e[2],pp[2],add1[1], 2'b0}; assign acc[2] = {47'b01,~e[2],pp[2],add1[1], 2'b0};
assign acc[3] = {46'b01,~e[3],pp[3],add1[2], 4'b0}; assign acc[3] = {45'b01,~e[3],pp[3],add1[2], 4'b0};
assign acc[4] = {44'b01,~e[4],pp[4],add1[3], 6'b0}; assign acc[4] = {43'b01,~e[4],pp[4],add1[3], 6'b0};
assign acc[5] = {42'b01,~e[5],pp[5],add1[4], 8'b0}; assign acc[5] = {41'b01,~e[5],pp[5],add1[4], 8'b0};
assign acc[6] = {40'b01,~e[6],pp[6],add1[5], 10'b0}; assign acc[6] = {39'b01,~e[6],pp[6],add1[5], 10'b0};
assign acc[7] = {38'b01,~e[7],pp[7],add1[6], 12'b0}; assign acc[7] = {37'b01,~e[7],pp[7],add1[6], 12'b0};
assign acc[8] = {36'b01,~e[8],pp[8],add1[7], 14'b0}; assign acc[8] = {35'b01,~e[8],pp[8],add1[7], 14'b0};
assign acc[9] = {34'b01,~e[9],pp[9],add1[8], 16'b0}; assign acc[9] = {33'b01,~e[9],pp[9],add1[8], 16'b0};
assign acc[10] = {32'b01,~e[10],pp[10],add1[9], 18'b0}; assign acc[10] = {31'b01,~e[10],pp[10],add1[9], 18'b0};
assign acc[11] = {30'b01,~e[11],pp[11],add1[10], 20'b0}; assign acc[11] = {29'b01,~e[11],pp[11],add1[10], 20'b0};
assign acc[12] = {28'b01,~e[12],pp[12],add1[11], 22'b0}; assign acc[12] = {27'b01,~e[12],pp[12],add1[11], 22'b0};
assign acc[13] = {26'b01,~e[13],pp[13],add1[12], 24'b0}; assign acc[13] = {25'b01,~e[13],pp[13],add1[12], 24'b0};
assign acc[14] = {24'b01,~e[14],pp[14],add1[13], 26'b0}; assign acc[14] = {23'b01,~e[14],pp[14],add1[13], 26'b0};
assign acc[15] = {22'b01,~e[15],pp[15],add1[14], 28'b0}; assign acc[15] = {21'b01,~e[15],pp[15],add1[14], 28'b0};
assign acc[16] = {20'b01,~e[16],pp[16],add1[15], 30'b0}; assign acc[16] = {19'b01,~e[16],pp[16],add1[15], 30'b0};
assign acc[17] = {18'b01,~e[17],pp[17],add1[16], 32'b0}; assign acc[17] = {17'b01,~e[17],pp[17],add1[16], 32'b0};
assign acc[18] = {16'b01,~e[18],pp[18],add1[17], 34'b0}; assign acc[18] = {15'b01,~e[18],pp[18],add1[17], 34'b0};
assign acc[19] = {14'b01,~e[19],pp[19],add1[18], 36'b0}; assign acc[19] = {13'b01,~e[19],pp[19],add1[18], 36'b0};
assign acc[20] = {12'b01,~e[20],pp[20],add1[19], 38'b0}; assign acc[20] = {11'b01,~e[20],pp[20],add1[19], 38'b0};
assign acc[21] = {10'b01,~e[21],pp[21],add1[20], 40'b0}; assign acc[21] = {9'b01,~e[21],pp[21],add1[20], 40'b0};
assign acc[22] = {8'b01,~e[22],pp[22],add1[21], 42'b0}; assign acc[22] = {7'b01,~e[22],pp[22],add1[21], 42'b0};
assign acc[23] = {6'b01,~e[23],pp[23],add1[22], 44'b0}; assign acc[23] = {5'b01,~e[23],pp[23],add1[22], 44'b0};
assign acc[24] = {4'b01,~e[24],pp[24],add1[23], 46'b0}; assign acc[24] = {3'b01,~e[24],pp[24],add1[23], 46'b0};
assign acc[25] = {~e[25],pp[25],add1[24], 48'b0}; assign acc[25] = {1'b0, ~e[25],pp[25],add1[24], 48'b0};
assign acc[26] = {pp[26],add1[25], 50'b0}; assign acc[26] = {pp[26],add1[25], 50'b0};
//*** resize adders //*** resize adders
generate generate
for(i=0; i<9; i=i+1) begin for(i=0; i<9; i=i+1) begin
add3comp2 #(.BITS(106)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]), add3comp2 #(.BITS(107)) add1(.a(acc[i*3]), .b(acc[i*3+1]), .c(acc[i*3+2]),
.carry(carryTmp[i][105:0]), .sum(lv1add[i*2+1])); .carry(carryTmp[i][106:0]), .sum(lv1add[i*2+1]));
assign lv1add[i*2] = {carryTmp[i][104:0], 1'b0}; assign lv1add[i*2] = {carryTmp[i][105:0], 1'b0};
end end
endgenerate endgenerate
generate generate
for(i=0; i<6; i=i+1) begin for(i=0; i<6; i=i+1) begin
add3comp2 #(.BITS(106)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]), add3comp2 #(.BITS(107)) add2(.a(lv1add[i*3]), .b(lv1add[i*3+1]), .c(lv1add[i*3+2]),
.carry(carryTmp[i+9][105:0]), .sum(lv2add[i*2+1])); .carry(carryTmp[i+9][106:0]), .sum(lv2add[i*2+1]));
assign lv2add[i*2] = {carryTmp[i+9][104:0], 1'b0}; assign lv2add[i*2] = {carryTmp[i+9][105:0], 1'b0};
end end
endgenerate endgenerate
generate generate
for(i=0; i<4; i=i+1) begin for(i=0; i<4; i=i+1) begin
add3comp2 #(.BITS(106)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]), add3comp2 #(.BITS(107)) add3(.a(lv2add[i*3]), .b(lv2add[i*3+1]), .c(lv2add[i*3+2]),
.carry(carryTmp[i+15][105:0]), .sum(lv3add[i*2+1])); .carry(carryTmp[i+15][106:0]), .sum(lv3add[i*2+1]));
assign lv3add[i*2] = {carryTmp[i+15][104:0], 1'b0}; assign lv3add[i*2] = {carryTmp[i+15][105:0], 1'b0};
end end
endgenerate endgenerate
generate generate
for(i=0; i<2; i=i+1) begin for(i=0; i<2; i=i+1) begin
add4comp2 #(.BITS(106)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]), add4comp2 #(.BITS(107)) add4(.a(lv3add[i*4]), .b(lv3add[i*4+1]), .c(lv3add[i*4+2]), .d(lv3add[i*4+3]),
.carry(carryTmp[i+19]), .sum(lv4add[i*2+1])); .carry(carryTmp[i+19]), .sum(lv4add[i*2+1]));
assign lv4add[i*2] = {carryTmp[i+19][104:0], 1'b0}; assign lv4add[i*2] = {carryTmp[i+19][105:0], 1'b0};
end end
endgenerate endgenerate
add4comp2 #(.BITS(106)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) , add4comp2 #(.BITS(107)) add5(.a(lv4add[0]), .b(lv4add[1]), .c(lv4add[2]), .d(lv4add[3]) ,
.carry(carryTmp[21]), .sum(sE)); .carry(carryTmp[21]), .sum(tmpsE));
assign sE = tmpsE[105:0];
assign rE = {carryTmp[21][104:0], 1'b0}; assign rE = {carryTmp[21][104:0], 1'b0};
// assign rE = 0; // assign rE = 0;
// assign sE = acc[0] + // assign sE = acc[0] +
@ -131,3 +133,4 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE);
// assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman}; // assign sE = {53'b0,~(xdenormE|xzeroE),xman} * {53'b0,~(ydenormE|yzeroE),yman};
// assign rE = 0; // assign rE = 0;
endmodule endmodule

View File

@ -56,8 +56,8 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// The sticky bit calculation is actually built into the shifter and // The sticky bit calculation is actually built into the shifter and
// does not require a true subtraction shown in the model. // does not require a true subtraction shown in the model.
assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2; assign isShiftLeft1 = (aligncntM == 13'b1 ||aligncntM == 13'b0 || $signed(aligncntM) == $signed(-(13'b1)))&& zexp == 11'h2;
assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022)); // assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022));
always_comb always_comb
begin begin
// d = aligncntM // d = aligncntM
@ -65,19 +65,19 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// p = 53 // p = 53
// ea + eb = aeM // ea + eb = aeM
// set d<=2 to d<=0 // set d<=2 to d<=0
if ($signed(aligncntM)<=$signed(2)) begin //d<=2 if ($signed(aligncntM)<=$signed(13'd2)) begin //d<=2
// product anchored or cancellation // product anchored or cancellation
if ($signed(aeM-normcnt+2) >= $signed(-1022)) begin //ea+eb-l+2 >= emin if ($signed(aeM-{{4{normcnt[8]}},normcnt}+13'd2) >= $signed(-(13'd1022))) begin //ea+eb-l+2 >= emin
//normal result //normal result
de0 = xzeroM|yzeroM ? zexp : aeM-normcnt+xdenormM+ydenormM+57; de0 = xzeroM|yzeroM ? {2'b0,zexp} : aeM-{{4{normcnt[8]}},normcnt}+{12'b0,xdenormM}+{12'b0,ydenormM}+13'd57;
resultdenorm = |sum & ~|de0 | de0[12]; resultdenorm = |sum & ~|de0 | de0[12];
// if z is zero then there was a 56 bit shift of the product // if z is zero then there was a 56 bit shift of the product
sumshifted = resultdenorm ? sum << sumshiftM-zzeroM+isShiftLeft1 : sum << normcnt; // p+2+l sumshifted = resultdenorm ? sum << sumshiftM-{8'b0,zzeroM}+{8'b0,isShiftLeft1} : sum << normcnt; // p+2+l
v = sumshifted[162:109]; v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM; sticky = (|sumshifted[108:0]) | bsM;
//de0 = aeM-normcnt+2-1023; //de0 = aeM-normcnt+2-1023;
end else begin end else begin
sumshifted = sum << (1080+aeM); sumshifted = sum << (13'd1080+aeM);
v = sumshifted[162:109]; v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM; sticky = (|sumshifted[108:0]) | bsM;
resultdenorm = 1; resultdenorm = 1;
@ -96,29 +96,29 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
// the book says exp = zexp + {-1,0,1} // the book says exp = zexp + {-1,0,1}
if(sumshiftzeroM) begin if(sumshiftzeroM) begin
v = sum[162:109]; v = sum[162:109];
sticky = sum[108:0] | bsM; sticky = (|sum[108:0]) | bsM;
de0 = zexp; de0 = {2'b0,zexp};
end else if(sumshifted[163] & ~sumshifttmp[9])begin end else if(sumshifted[163] & ~sumshifttmp[9])begin
v = sumshifted[162:109]; v = sumshifted[162:109];
sticky = (|sumshifted[108:0]) | bsM; sticky = (|sumshifted[108:0]) | bsM;
de0 = zexp +2; de0 = {2'b0,zexp} +13'd2;
end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin end else if ((sumshifttmp[9] & sumshiftM[0]) || sumshifted[162]) begin
v = sumshifted[161:108]; v = sumshifted[161:108];
sticky = (|sumshifted[107:0]) | bsM; sticky = (|sumshifted[107:0]) | bsM;
de0 = zexp+1; de0 = {2'b0,zexp}+13'd1;
end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin end else if (sumshifted[161] || (sumshifttmp[9] & sumshiftM[1])) begin
v = sumshifted[160:107]; v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM; sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
de0 = zexp+zdenormM; de0 = {2'b0,zexp}+{12'b0,zdenormM};
end else if(sumshifted[160]& ~zdenormM) begin end else if(sumshifted[160]& ~zdenormM) begin
de0 = zexp-1; de0 = {2'b0,zexp}-13'b1;
v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106]; v = ~|de0&~sumzero ? sumshifted[160:107] : sumshifted[159:106];
sticky = (|sumshifted[105:0]) | bsM; sticky = (|sumshifted[105:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
end else if(sumshifted[159]& ~zdenormM) begin end else if(sumshifted[159]& ~zdenormM) begin
//v = sumshifted[158:105]; //v = sumshifted[158:105];
de0 = zexp-2; de0 = {2'b0,zexp}-13'd2;
v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105]; v = (~|de0 | de0[12])&~sumzero ? sumshifted[161:108] : sumshifted[158:105];
sticky = (|sumshifted[104:0]) | bsM; sticky = (|sumshifted[104:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
@ -126,7 +126,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
v = sumshifted[160:107]; v = sumshifted[160:107];
sticky = (|sumshifted[106:0]) | bsM; sticky = (|sumshifted[106:0]) | bsM;
//de0 = zexp-1; //de0 = zexp-1;
de0 = zexp; de0 = {{2{zexp[62]}},zexp};
end else begin end else begin
de0 = 0; de0 = 0;
sumshifted = sum << sumshiftM-1; // p+2+l sumshifted = sum << sumshiftM-1; // p+2+l
@ -144,3 +144,4 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5;
endmodule endmodule

View File

@ -4,7 +4,7 @@
// Date: 11/2/1995 // Date: 11/2/1995
// //
// Block Description: // Block Description:
// This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z inputs, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay. // This block is responsible for rounding the normalized result of // the FMAC. Because prenormalized results may be bypassed back to // the FMAC X and z input logics, rounding does not appear in the critical // path of most floating point code. This is good because rounding // requires an entire 52 bit carry-propagate half-adder delay.
// //
// The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also // The results from other FPU blocks (e.g. FCVT, FDIV, etc) are also
// muxed in to form the actual result for register file writeback. This // muxed in to form the actual result for register file writeback. This
@ -24,14 +24,14 @@ module round(v, sticky, FrmM, wsign,
input logic [2:0] FrmM; input logic [2:0] FrmM;
input logic wsign; // Sign of result input logic wsign; // Sign of result
input logic [4:0] FmaFlagsM; input logic [4:0] FmaFlagsM;
input logic inf; // Some input is infinity input logic inf; // Some input logic is infinity
input logic nanM; // Some input is NaN input logic nanM; // Some input logic is NaN
input logic xnanM; // X is NaN input logic xnanM; // X is NaN
input logic ynanM; // Y is NaN input logic ynanM; // Y is NaN
input logic znanM; // Z is NaN input logic znanM; // Z is NaN
input logic [51:0] xman; // Input X input logic [51:0] xman; // input logic X
input logic [51:0] yman; // Input Y input logic [51:0] yman; // input logic Y
input logic [51:0] zman; // Input Z input logic [51:0] zman; // input logic Z
output logic [51:0] wman; // rounded result of FMAC output logic [51:0] wman; // rounded result of FMAC
output logic infinity; // Generate infinity on overflow output logic infinity; // Generate infinity on overflow
output logic specialsel; // Select special result output logic specialsel; // Select special result
@ -85,7 +85,7 @@ module round(v, sticky, FrmM, wsign,
// The special result mux is a 4:1 mux that should not appear in the // The special result mux is a 4:1 mux that should not appear in the
// critical path of the machine. It is not priority encoded, despite // critical path of the machine. It is not priority encoded, despite
// the code below suggesting otherwise. Also, several of the identical data // the code below suggesting otherwise. Also, several of the identical data
// inputs to the wide muxes can be combined at the expense of more // input logics to the wide muxes can be combined at the expense of more
// complicated non-critical control in the circuit implementation. // complicated non-critical control in the circuit implementation.
assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid assign specialsel = FmaFlagsM[2] || FmaFlagsM[1] || FmaFlagsM[4] || //overflow underflow invalid
@ -102,15 +102,15 @@ module round(v, sticky, FrmM, wsign,
assign infinityres = infinity ? 52'b0 : {52{1'b1}}; assign infinityres = infinity ? 52'b0 : {52{1'b1}};
// Invalid operations produce a quiet NaN. The result should // Invalid operations produce a quiet NaN. The result should
// propagate an input if the input is NaN. Since we assume all // propagate an input logic if the input logic is NaN. Since we assume all
// NaN inputs are already quiet, we don't have to force them quiet. // NaN input logics are already quiet, we don't have to force them quiet.
// assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original // assign nanres = xnanM ? x: (ynanM ? y : (znanM ? z : {1'b1, 51'b0})); // original
// IEEE 754-2008 section 6.2.3 states: // IEEE 754-2008 section 6.2.3 states:
// "If two or more inputs are NaN, then the payload of the resulting NaN should be // "If two or more input logics are NaN, then the payload of the resulting NaN should be
// identical to the payload of one of the input NaNs if representable in the destination // identical to the payload of one of the input logic NaNs if representable in the destination
// format. This standard does not specify which of the input NaNs will provide the payload." // format. This standard does not specify which of the input logic NaNs will provide the payload."
assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet assign nanres = xnanM ? {1'b1, xman[50:0]}: (ynanM ? {1'b1, yman[50:0]} : (znanM ? {1'b1, zman[50:0]} : {1'b1, 51'b0}));// KEP 210112 add the 1 to make NaNs quiet
// Select result with 4:1 mux // Select result with 4:1 mux

View File

@ -238,7 +238,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) ( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) : ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
) : ) :
(op_type[3]) ? exp_A_unmodified : Rexp; (op_type[3]) ? exp_A_unmodified[10:0] : Rexp; //KEP used to be all of exp_A_unmodified
// If the result is zero or infinity, the mantissa is all zeros. // If the result is zero or infinity, the mantissa is all zeros.
// If the result is NaN, the mantissa is 10...0 // If the result is NaN, the mantissa is 10...0

View File

@ -67,6 +67,7 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
wire sign_rem; wire sign_rem;
wire [63:0] q, qm, qp; wire [63:0] q, qm, qp;
wire exp_ovf, exp_ovfSP, exp_ovfDP; wire exp_ovf, exp_ovfSP, exp_ovfDP;
logic zero_rem;
// Remainder = 0? // Remainder = 0?
assign zero_rem = ~(|regr_out); assign zero_rem = ~(|regr_out);
@ -97,7 +98,7 @@ module rounder_div (Result, DenormIO, Flags, rm, P, OvEn,
// 1.) we choose any qm0, qp0, q0 (since we shift mant) // 1.) we choose any qm0, qp0, q0 (since we shift mant)
// 2.) we choose qp and we overflow (for RU) // 2.) we choose qp and we overflow (for RU)
assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})}; assign exp_ovf = |{qp[62:40], (qp[39:11] & {29{~P}})};
assign Texp = exp_diff - {{13{vss}}, ~q1[63]} + {{13{vss}}, mux_mant[1]&qp1[63]&~exp_ovf}; assign Texp = exp_diff - {{12{vss}}, ~q1[63]} + {{12{vss}}, mux_mant[1]&qp1[63]&~exp_ovf}; // KEP used to be 13{vss}
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are // Overflow only occurs for double precision, if Texp[10] to Texp[0] are
// all ones. To encourage sharing with single precision overflow detection, // all ones. To encourage sharing with single precision overflow detection,

View File

@ -13,6 +13,7 @@ module sbtm2 (input logic [11:0] a, output logic [10:0] y);
logic [14:0] op1; logic [14:0] op1;
logic [14:0] op2; logic [14:0] op2;
logic [14:0] p; logic [14:0] p;
logic cout;
assign x0 = a[11:7]; assign x0 = a[11:7];
assign x1 = a[6:4]; assign x1 = a[6:4];

View File

@ -25,7 +25,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
input logic [4:0] FmaFlagsM; // Round toward minus infinity input logic [4:0] FmaFlagsM; // Round toward minus infinity
input logic sumzero; // Sum = O input logic sumzero; // Sum = O
input logic zinfM; // Y = Inf input logic zinfM; // Y = Inf
input logic inf; // Some input = Inf input logic inf; // Some input logic = Inf
output logic wsign; // Sign of W output logic wsign; // Sign of W
output logic invz; // Invert addend into adder output logic invz; // Invert addend into adder
output logic negsum; // Negate result of adder output logic negsum; // Negate result of adder
@ -36,6 +36,9 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
wire zerosign; // sign if result= 0 wire zerosign; // sign if result= 0
wire sumneg; // sign if result= 0 wire sumneg; // sign if result= 0
wire infsign; // sign if result= Inf wire infsign; // sign if result= Inf
logic tmp;
logic psign;
// Compute sign of product // Compute sign of product
assign psign = xsign ^ ysign; assign psign = xsign ^ ysign;
@ -55,7 +58,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign); assign sumneg = invz&zsign&negsum1 | invz&psign&~negsum1 | (zsign&psign);
//always @(invz or negsum0 or negsum1 or bsM or ps) //always @(invz or negsum0 or negsum1 or bsM or ps)
// begin // begin
// if (~invz) begin // both inputs have same sign // if (~invz) begin // both input logics have same sign
// negsum = 0; // negsum = 0;
// selsum1 = 0; // selsum1 = 0;
// end else if (bsM) begin // sticky bit set on addend // end else if (bsM) begin // sticky bit set on addend
@ -80,7 +83,7 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM,
// Sign calculation is not in the critical path so the cases // Sign calculation is not in the critical path so the cases
// can be tolerated. // can be tolerated.
// IEEE 754-2008 section 6.3 states // IEEE 754-2008 section 6.3 states
// "When ether an input or result is NaN, this standard does not interpret the sign of a NaN." // "When ether an input logic or result is NaN, this standard does not interpret the sign of a NaN."
// also pertaining to negZero it states: // also pertaining to negZero it states:
// "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference // "When the sum/difference of two operands with opposite signs is exactly zero, the sign of that sum/difference
// shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero // shall be +0 in all rounding attributes EXCEPT roundTowardNegative. Under that attribute, the sign of an exact zero

View File

@ -60,7 +60,7 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE,
// assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE; // assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE;
// assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE; // assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE;
// assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE; // assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE;
// KATHERINE - removed denorm to prevent outputing zero when computing with a denormalized number // KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
assign xzeroE = ~(|ReadData1E[62:0]); assign xzeroE = ~(|ReadData1E[62:0]);
assign yzeroE = ~(|ReadData2E[62:0]); assign yzeroE = ~(|ReadData2E[62:0]);
assign zzeroE = ~(|ReadData3E[62:0]); assign zzeroE = ~(|ReadData3E[62:0]);

View File

@ -28,7 +28,7 @@
module privdec ( module privdec (
input logic [31:20] InstrM, input logic [31:20] InstrM,
input logic PrivilegedM, IllegalIEUInstrFaultM, IllegalCSRAccessM, input logic PrivilegedM, IllegalIEUInstrFaultM, IllegalCSRAccessM, IllegalFPUInstrM,
input logic [1:0] PrivilegeModeW, input logic [1:0] PrivilegeModeW,
input logic STATUS_TSR, input logic STATUS_TSR,
output logic IllegalInstrFaultM, output logic IllegalInstrFaultM,
@ -47,7 +47,7 @@ module privdec (
assign wfiM = PrivilegedM & (InstrM[31:20] == 12'b000100000101); assign wfiM = PrivilegedM & (InstrM[31:20] == 12'b000100000101);
assign sfencevmaM = PrivilegedM & (InstrM[31:25] == 7'b0001001); assign sfencevmaM = PrivilegedM & (InstrM[31:25] == 7'b0001001);
assign IllegalPrivilegedInstrM = PrivilegedM & ~(uretM|sretM|mretM|ecallM|ebreakM|wfiM|sfencevmaM); assign IllegalPrivilegedInstrM = PrivilegedM & ~(uretM|sretM|mretM|ecallM|ebreakM|wfiM|sfencevmaM);
assign IllegalInstrFaultM = IllegalIEUInstrFaultM | IllegalPrivilegedInstrM | IllegalCSRAccessM; // *** generalize this for other instructions assign IllegalInstrFaultM = (IllegalIEUInstrFaultM & IllegalFPUInstrM) | IllegalPrivilegedInstrM | IllegalCSRAccessM | IllegalFPUInstrM; // *** generalize this for other instructions
// *** initially, wfi and sfencevma are nop // *** initially, wfi and sfencevma are nop
// *** zfenci extension? // *** zfenci extension?

View File

@ -46,7 +46,7 @@ module privileged (
input logic PrivilegedM, input logic PrivilegedM,
input logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM, input logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM,
input logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM, input logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM,
input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD, input logic InstrMisalignedFaultM, IllegalIEUInstrFaultD, IllegalFPUInstrD,
input logic LoadMisalignedFaultM, input logic LoadMisalignedFaultM,
input logic StoreMisalignedFaultM, input logic StoreMisalignedFaultM,
input logic TimerIntM, ExtIntM, SwIntM, input logic TimerIntM, ExtIntM, SwIntM,
@ -78,6 +78,7 @@ module privileged (
logic uretM, sretM, mretM, ecallM, ebreakM, wfiM, sfencevmaM; logic uretM, sretM, mretM, ecallM, ebreakM, wfiM, sfencevmaM;
logic IllegalCSRAccessM; logic IllegalCSRAccessM;
logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM; logic IllegalIEUInstrFaultE, IllegalIEUInstrFaultM;
logic IllegalFPUInstrE, IllegalFPUInstrM;
logic LoadPageFaultM, StorePageFaultM; logic LoadPageFaultM, StorePageFaultM;
logic InstrPageFaultF, InstrPageFaultD, InstrPageFaultE, InstrPageFaultM; logic InstrPageFaultF, InstrPageFaultD, InstrPageFaultE, InstrPageFaultM;
logic InstrAccessFaultF, InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM; logic InstrAccessFaultF, InstrAccessFaultD, InstrAccessFaultE, InstrAccessFaultM;
@ -158,12 +159,12 @@ module privileged (
flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD, flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD,
{InstrPageFaultF, InstrAccessFaultF}, {InstrPageFaultF, InstrAccessFaultF},
{InstrPageFaultD, InstrAccessFaultD}); {InstrPageFaultD, InstrAccessFaultD});
flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE, flopenrc #(4) faultregE(clk, reset, FlushE, ~StallE,
{IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD}, // ** vs IllegalInstrFaultInD {IllegalIEUInstrFaultD, InstrPageFaultD, InstrAccessFaultD, IllegalFPUInstrD}, // ** vs IllegalInstrFaultInD
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE}); {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE});
flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM, flopenrc #(4) faultregM(clk, reset, FlushM, ~StallM,
{IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE}, {IllegalIEUInstrFaultE, InstrPageFaultE, InstrAccessFaultE, IllegalFPUInstrE},
{IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM}); {IllegalIEUInstrFaultM, InstrPageFaultM, InstrAccessFaultM, IllegalFPUInstrM});
trap trap(.*); trap trap(.*);

View File

@ -96,7 +96,7 @@ module wallypipelinedhart (
logic SquashSCW; logic SquashSCW;
logic [31:0] FSROutW; logic [31:0] FSROutW;
logic DivSqrtDoneE; logic DivSqrtDoneE;
logic FInvalInstrD; logic IllegalFPUInstrD;
logic [`XLEN-1:0] FPUResultW; logic [`XLEN-1:0] FPUResultW;
// memory management unit signals // memory management unit signals
@ -174,7 +174,7 @@ module wallypipelinedhart (
privileged priv(.*); privileged priv(.*);
// fpu fpu(.*); // floating point unit fpu fpu(.*); // floating point unit
// add FPU here, with SetFflagsM, FRM_REGW // add FPU here, with SetFflagsM, FRM_REGW
// presently stub out SetFlagsM and FloatRegWriteW // presently stub out SetFlagsM and FloatRegWriteW
//assign SetFflagsM = 0; //assign SetFflagsM = 0;

View File

@ -416,18 +416,6 @@ module testbench();
`CHECK_CSR2(STVAL, `CSRS) `CHECK_CSR2(STVAL, `CSRS)
`CHECK_CSR(STVEC) `CHECK_CSR(STVEC)
//$stop;
generate
if (`BUSYBEAR == 1) begin
initial begin //this is temporary until the bug can be fixed!!!
#11130100;
force dut.hart.ieu.dp.regf.rf[5] = 64'h0000000080000004;
#100;
release dut.hart.ieu.dp.regf.rf[5];
end
end
endgenerate
logic speculative; logic speculative;
initial begin initial begin
speculative = 0; speculative = 0;

View File

@ -52,7 +52,71 @@ module testbench();
string tests64f[] = '{ string tests64f[] = '{
"rv64f/I-FADD-S-01", "2000", "rv64f/I-FADD-S-01", "2000",
"rv64f/I-FCLASS-S-01", "2000" "rv64f/I-FCLASS-S-01", "2000",
"rv64f/I-FCVT-S-L-01", "2000",
"rv64f/I-FCVT-S-LU-01", "2000",
"rv64f/I-FCVT-S-W-01", "2000",
"rv64f/I-FCVT-S-WU-01", "2000",
"rv64f/I-FCVT-L-S-01", "2000",
"rv64f/I-FCVT-LU-S-01", "2000",
"rv64f/I-FCVT-W-S-01", "2000",
"rv64f/I-FCVT-WU-S-01", "2000",
"rv64f/I-FDIV-S-01", "2000",
"rv64f/I-FEQ-S-01", "2000",
"rv64f/I-FLE-S-01", "2000",
"rv64f/I-FLT-S-01", "2000",
"rv64f/I-FMADD-S-01", "2000",
"rv64f/I-FMAX-S-01", "2000",
"rv64f/I-FMIN-S-01", "2000",
"rv64f/I-FMSUB-S-01", "2000",
"rv64f/I-FMUL-S-01", "2000",
"rv64f/I-FMV-W-X-01", "2000",
"rv64f/I-FMV-X-W-01", "2000",
"rv64f/I-FNMADD-S-01", "2000",
"rv64f/I-FNMSUB-S-01", "2000",
"rv64f/I-FSGNJ-S-01", "2000",
"rv64f/I-FSGNJN-S-01", "2000",
"rv64f/I-FSGNJX-S-01", "2000",
"rv64f/I-FSQRT-S-01", "2000",
"rv64f/I-FSW-01", "2000",
"rv64f/I-FLW-01", "2110",
"rv64f/I-FSUB-S-01", "2000"
};
string tests64d[] = '{
"rv64d/I-FADD-D-01", "2000",
"rv64d/I-FCLASS-D-01", "2000",
"rv64d/I-FCVT-D-L-01", "2000",
"rv64d/I-FCVT-D-LU-01", "2000",
"rv64d/I-FCVT-D-S-01", "2000",
"rv64d/I-FCVT-D-W-01", "2000",
"rv64d/I-FCVT-D-WU-01", "2000",
"rv64d/I-FCVT-L-D-01", "2000",
"rv64d/I-FCVT-LU-D-01", "2000",
"rv64d/I-FCVT-S-D-01", "2000",
"rv64d/I-FCVT-W-D-01", "2000",
"rv64d/I-FCVT-WU-D-01", "2000",
"rv64d/I-FDIV-D-01", "2000",
"rv64d/I-FEQ-D-01", "2000",
"rv64d/I-FLD-D-01", "2420",
"rv64d/I-FLE-D-01", "2000",
"rv64d/I-FLT-D-01", "2000",
"rv64d/I-FMADD-D-01", "2000",
"rv64d/I-FMAX-D-01", "2000",
"rv64d/I-FMIN-D-01", "2000",
"rv64d/I-FMSUB-D-01", "2000",
"rv64d/I-FMUL-D-01", "2000",
"rv64d/I-FMV-D-X-01", "2000",
"rv64d/I-FMV-X-D-01", "2000",
"rv64d/I-FNMADD-D-01", "2000",
"rv64d/I-FNMSUB-D-01", "2000",
"rv64d/I-FSD-01", "2000",
"rv64d/I-FSGNJ-D-01", "2000",
"rv64d/I-FSGNJN-D-01", "2000",
"rv64d/I-FSGNJX-D-01", "2000",
"rv64d/I-FSQRTD-01", "2000",
"rv64d/I-FSUB-D-01", "2000"
}; };
string tests64a[] = '{ string tests64a[] = '{
@ -259,6 +323,40 @@ module testbench();
"rv32i/I-MISALIGN_JMP-01","2000" "rv32i/I-MISALIGN_JMP-01","2000"
}; };
string tests32f[] = '{
"rv32f/I-FADD-S-01", "2000",
"rv32f/I-FCLASS-S-01", "2000",
"rv32f/I-FCVT-S-L-01", "2000",
"rv32f/I-FCVT-S-LU-01", "2000",
"rv32f/I-FCVT-S-W-01", "2000",
"rv32f/I-FCVT-S-WU-01", "2000",
"rv32f/I-FCVT-L-S-01", "2000",
"rv32f/I-FCVT-LU-S-01", "2000",
"rv32f/I-FCVT-W-S-01", "2000",
"rv32f/I-FCVT-WU-S-01", "2000",
"rv32f/I-FDIV-S-01", "2000",
"rv32f/I-FEQ-S-01", "2000",
"rv32f/I-FLE-S-01", "2000",
"rv32f/I-FLT-S-01", "2000",
"rv32f/I-FMADD-S-01", "2000",
"rv32f/I-FMAX-S-01", "2000",
"rv32f/I-FMIN-S-01", "2000",
"rv32f/I-FMSUB-S-01", "2000",
"rv32f/I-FMUL-S-01", "2000",
"rv32f/I-FMV-W-X-01", "2000",
"rv32f/I-FMV-X-W-01", "2000",
"rv32f/I-FNMADD-S-01", "2000",
"rv32f/I-FNMSUB-S-01", "2000",
"rv32f/I-FSGNJ-S-01", "2000",
"rv32f/I-FSGNJN-S-01", "2000",
"rv32f/I-FSGNJX-S-01", "2000",
"rv32f/I-FSQRT-S-01", "2000",
"rv32f/I-FSW-01", "2000",
"rv32f/I-FLW-01", "2110",
"rv32f/I-FSUB-S-01", "2000"
};
string tests32i[] = { string tests32i[] = {
"rv32i/I-ADD-01", "2000", "rv32i/I-ADD-01", "2000",
"rv32i/I-ADDI-01","2000", "rv32i/I-ADDI-01","2000",
@ -617,11 +715,13 @@ module instrNameDecTB(
logic [2:0] funct3; logic [2:0] funct3;
logic [6:0] funct7; logic [6:0] funct7;
logic [11:0] imm; logic [11:0] imm;
logic [4:0] rs2;
assign op = instr[6:0]; assign op = instr[6:0];
assign funct3 = instr[14:12]; assign funct3 = instr[14:12];
assign funct7 = instr[31:25]; assign funct7 = instr[31:25];
assign imm = instr[31:20]; assign imm = instr[31:20];
assign rs2 = instr[24:20];
// it would be nice to add the operands to the name // it would be nice to add the operands to the name
// create another variable called decoded // create another variable called decoded
@ -745,6 +845,67 @@ module instrNameDecTB(
else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D"; else if (funct7[6:2] == 5'b11100) name = "AMOMAXU.D";
else name = "ILLEGAL"; else name = "ILLEGAL";
10'b0001111_???: name = "FENCE"; 10'b0001111_???: name = "FENCE";
10'b1000011_???: name = "FMADD";
10'b1000111_???: name = "FMSUB";
10'b1001011_???: name = "FNMSUB";
10'b1001111_???: name = "FNMADD";
10'b1010011_000: if (funct7[6:2] == 5'b00000) name = "FADD";
else if (funct7[6:2] == 5'b00001) name = "FSUB";
else if (funct7[6:2] == 5'b00010) name = "FMUL";
else if (funct7[6:2] == 5'b00011) name = "FDIV";
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else if (funct7 == 7'b1110000 && rs2 == 5'b00000) name = "FMV.X.W";
else if (funct7 == 7'b1111000 && rs2 == 5'b00000) name = "FMV.W.X";
else if (funct7 == 7'b1110001 && rs2 == 5'b00000) name = "FMV.X.W"; // DOUBLE
else if (funct7 == 7'b1111001 && rs2 == 5'b00000) name = "FMV.W.X"; // DOUBLE
else if (funct7[6:2] == 5'b00100) name = "FSGNJ";
else if (funct7[6:2] == 5'b00101) name = "FMIN";
else if (funct7[6:2] == 5'b10100) name = "FLE";
else name = "ILLEGAL";
10'b1010011_001: if (funct7[6:2] == 5'b00000) name = "FADD";
else if (funct7[6:2] == 5'b00001) name = "FSUB";
else if (funct7[6:2] == 5'b00010) name = "FMUL";
else if (funct7[6:2] == 5'b00011) name = "FDIV";
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else if (funct7[6:2] == 5'b00100) name = "FSGNJN";
else if (funct7[6:2] == 5'b00101) name = "FMAX";
else if (funct7[6:2] == 5'b10100) name = "FLT";
else if (funct7[6:2] == 5'b11100) name = "FCLASS";
else name = "ILLEGAL";
10'b0101111_010: if (funct7[6:2] == 5'b00000) name = "FADD";
else if (funct7[6:2] == 5'b00001) name = "FSUB";
else if (funct7[6:2] == 5'b00010) name = "FMUL";
else if (funct7[6:2] == 5'b00011) name = "FDIV";
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else if (funct7[6:2] == 5'b00100) name = "FSGNJX";
else if (funct7[6:2] == 5'b10100) name = "FEQ";
else name = "ILLEGAL";
10'b1010011_???: if (funct7[6:2] == 5'b00000) name = "FADD";
else if (funct7[6:2] == 5'b00001) name = "FSUB";
else if (funct7[6:2] == 5'b00010) name = "FMUL";
else if (funct7[6:2] == 5'b00011) name = "FDIV";
else if (funct7[6:2] == 5'b01011) name = "FSQRT";
else if (funct7 == 7'b1100000 && rs2 == 5'b00000) name = "FCVT.W.S";
else if (funct7 == 7'b1100000 && rs2 == 5'b00001) name = "FCVT.WU.S";
else if (funct7 == 7'b1101000 && rs2 == 5'b00000) name = "FCVT.S.W";
else if (funct7 == 7'b1101000 && rs2 == 5'b00001) name = "FCVT.S.WU";
else name = "ILLEGAL";
10'b0000111_010: name = "FLW";
10'b0100111_010: name = "FSW";
10'b0000111_010: name = "FLD";
10'b0100111_010: name = "FSD";
default: name = "ILLEGAL"; default: name = "ILLEGAL";
endcase endcase
endmodule endmodule