diff --git a/wally-pipelined/src/fpu/add.sv b/wally-pipelined/src/fpu/add.sv index f2b52424..990c5ebe 100644 --- a/wally-pipelined/src/fpu/add.sv +++ b/wally-pipelined/src/fpu/add.sv @@ -15,16 +15,16 @@ module add(rM, sM, tM, sum, negsum, invz, selsum1, negsum0, negsum1, killprodM); //////////////////////////////////////////////////////////////////////////////// - input [105:0] rM; // partial product 1 - input [105:0] sM; // partial product 2 - input [163:0] tM; // aligned addend - input invz; // invert addend - input selsum1; // select +1 mode of compound adder - input killprodM; // z >> product - input negsum; // Negate sum - output [163:0] sum; // sum - output negsum0; // sum was negative in +0 mode - output negsum1; // sum was negative in +1 mode + input logic [105:0] rM; // partial product 1 + input logic [105:0] sM; // partial product 2 + input logic [163:0] tM; // aligned addend + input logic invz; // invert addend + input logic selsum1; // select +1 mode of compound adder + input logic killprodM; // z >> product + input logic negsum; // Negate sum + output logic [163:0] sum; // sum + output logic negsum0; // sum was negative in +0 mode + output logic negsum1; // sum was negative in +1 mode // Internal nodes @@ -44,11 +44,12 @@ module add(rM, sM, tM, sum, assign r2 = killprodM ? 106'b0 : rM; assign s2 = killprodM ? 106'b0 : sM; + //replace this with a more structural cpa that synthisises better // Compound adder // Consists of 3:2 CSA followed by long compound CPA - assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0}; - assign sum0 = {1'b0,prodshifted} + t2 + 158'b0; - assign sum1 = {1'b0,prodshifted} + t2 + 158'b1; // +1 from invert of z above + // assign prodshifted = killprodM ? 0 : {56'b0, r2+s2, 2'b0}; + assign sum0 = {1'b0,prodshifted} + t2 + 158'b0 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0}; + assign sum1 = {1'b0,prodshifted} + t2 + 158'b1 + {{56{r2[105]}},r2, 2'b0} + {{56{s2[105]}},s2, 2'b0}; // +1 from invert of z above // Check sign bits in +0/1 modes assign negsum0 = sum0[164]; diff --git a/wally-pipelined/src/fpu/align.sv b/wally-pipelined/src/fpu/align.sv index a63434ba..db336db3 100644 --- a/wally-pipelined/src/fpu/align.sv +++ b/wally-pipelined/src/fpu/align.sv @@ -15,33 +15,26 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE); ///////////////////////////////////////////////////////////////////////////// - input [51:0] zman; // Fraction of addend z; - input [12:0] aligncntE; // amount to shift - input xzeroE; // Input X = 0 - input yzeroE; // Input Y = 0 - input zzeroE; // Input Z = 0 - input zdenormE; // Input Z is denormalized - output [163:0] tE; // aligned addend (54 bits left of bpt) - output bsE; // sticky bit of addend - output killprodE; // Z >> product - output [7:0] sumshiftE; - output sumshiftzeroE; + input logic [51:0] zman; // Fraction of addend z; + input logic [12:0] aligncntE; // amount to shift + input logic xzeroE; // Input X = 0 + input logic yzeroE; // Input Y = 0 + input logic zzeroE; // Input Z = 0 + input logic zdenormE; // Input Z is denormalized + output logic [163:0] tE; // aligned addend (54 bits left of bpt) + output logic bsE; // sticky bit of addend + output logic killprodE; // Z >> product + output logic [8:0] sumshiftE; + output logic sumshiftzeroE; // Internal nodes - reg [163:0] tE; // aligned addend from shifter reg [215:0] shift; // aligned addend from shifter - reg killprodE; // Z >> product - reg bsE; // sticky bit of addend - reg ps; // sticky bit of product - reg zexpsel; // sticky bit of product + logic zexpsel; // sticky bit of product reg [7:0] i; // temp storage for finding sticky bit wire [52:0] z1; // Z plus 1 wire [51:0] z2; // Z selected after handling rounds - wire [11:0] align104; // alignment count + 104 - logic [8:0] sumshiftE; - logic sumshiftzeroE; - + // Compute sign of aligncntE + 104 to check for shifting too far right @@ -51,18 +44,18 @@ module align(zman, aligncntE, xzeroE, yzeroE, zzeroE, zdenormE, tE, bsE, // Shift addend by alignment count. Generate sticky bits from // addend on right shifts. Handle special cases of shifting // by too much. - - always @(aligncntE or xzeroE or yzeroE or zman or zdenormE or zzeroE) +//***change always @ to always_combs + always_comb begin // Default to clearing sticky bits bsE = 0; - ps = 0; // And to using product as primary operand in adder I exponent gen killprodE = xzeroE | yzeroE; // d = aligncntE // p = 53 + //***try reducing this hardware try getting onw shifter if ($signed(aligncntE) <= $signed(-105)) begin //d<=-2p+1 //product ancored case with saturated shift sumshiftE = 163; // 3p+4 diff --git a/wally-pipelined/src/fpu/booth.sv b/wally-pipelined/src/fpu/booth.sv index 73a5f44c..03511ff6 100644 --- a/wally-pipelined/src/fpu/booth.sv +++ b/wally-pipelined/src/fpu/booth.sv @@ -1,21 +1,19 @@ module booth(xExt, choose, add1, e, pp); ///////////////////////////////////////////////////////////////////////////// - input [53:0] xExt; // multiplicand xExt - input [2:0] choose; // bits needed to choose which encoding - output [1:0] add1; // do you add 1 - output e; - output [54:0] pp; // the resultant encoding + input logic [53:0] xExt; // multiplicand xExt + input logic [2:0] choose; // bits needed to choose which encoding + output logic [1:0] add1; // do you add 1 + output logic e; + output logic [54:0] pp; // the resultant encoding - logic [54:0] pp, temp; - logic e; - logic [1:0] add1; + logic [54:0] temp; logic [53:0] negx; //logic temp; assign negx = ~xExt; - always @(choose, xExt, negx) + always_comb case (choose) 3'b000 : pp = 55'b0; // 0 3'b001 : pp = {1'b0, xExt}; // 1 @@ -27,7 +25,7 @@ module booth(xExt, choose, add1, e, pp); 3'b111 : pp = 55'hfffffffffffffff; // -0 endcase - always @(choose, xExt, negx) + always_comb case (choose) 3'b000 : e = 0; // 0 3'b001 : e = 0; // 1 @@ -40,7 +38,7 @@ module booth(xExt, choose, add1, e, pp); endcase // assign add1 = (choose[2] == 1'b1) ? ((choose[1:0] == 2'b11) ? 1'b0 : 1'b1) : 1'b0; // assign add1 = choose[2]; - always @(choose) + always_comb case (choose) 3'b000 : add1 = 2'b0; // 0 3'b001 : add1 = 2'b0; // 1 diff --git a/wally-pipelined/src/fpu/compressors.sv b/wally-pipelined/src/fpu/compressors.sv index 9dd3fc11..89924b38 100644 --- a/wally-pipelined/src/fpu/compressors.sv +++ b/wally-pipelined/src/fpu/compressors.sv @@ -3,11 +3,11 @@ module add3comp2(a, b, c, carry, sum); //look into diffrent implementations of the compressors? parameter BITS = 4; - input [BITS-1:0] a; - input [BITS-1:0] b; - input [BITS-1:0] c; - output [BITS-1:0] carry; - output [BITS-1:0] sum; + input logic [BITS-1:0] a; + input logic [BITS-1:0] b; + input logic [BITS-1:0] c; + output logic [BITS-1:0] carry; + output logic [BITS-1:0] sum; genvar i; generate @@ -22,12 +22,12 @@ module add4comp2(a, b, c, d, carry, sum); ///////////////////////////////////////////////////////////////////////////// parameter BITS = 4; - input [BITS-1:0] a; - input [BITS-1:0] b; - input [BITS-1:0] c; - input [BITS-1:0] d; - output [BITS:0] carry; - output [BITS-1:0] sum; + input logic [BITS-1:0] a; + input logic [BITS-1:0] b; + input logic [BITS-1:0] c; + input logic [BITS-1:0] d; + output logic [BITS:0] carry; + output logic [BITS-1:0] sum; logic [BITS-1:0] cout; logic carryTmp; @@ -54,11 +54,11 @@ module sng3comp2(a, b, c, carry, sum); ///////////////////////////////////////////////////////////////////////////// //look into diffrent implementations of the compressors? - input a; - input b; - input c; - output carry; - output sum; + input logic a; + input logic b; + input logic c; + output logic carry; + output logic sum; logic axorb; @@ -73,14 +73,14 @@ module sng4comp2(a, b, c, d, cin, cout, carry, sum); ///////////////////////////////////////////////////////////////////////////// //look into pass gate 4:2 counters? - input a; - input b; - input c; - input d; - input cin; - output cout; - output carry; - output sum; + input logic a; + input logic b; + input logic c; + input logic d; + input logic cin; + output logic cout; + output logic carry; + output logic sum; logic TmpSum; diff --git a/wally-pipelined/src/fpu/expgen1.sv b/wally-pipelined/src/fpu/expgen1.sv index 04b0616a..3bb1a5e7 100644 --- a/wally-pipelined/src/fpu/expgen1.sv +++ b/wally-pipelined/src/fpu/expgen1.sv @@ -20,17 +20,17 @@ module expgen1(xexp, yexp, zexp, xzeroE, yzeroE, aligncntE, prodof, aeE); ///////////////////////////////////////////////////////////////////////////// - input [62:52] xexp; // Exponent of multiplicand x - input [62:52] yexp; // Exponent of multiplicand y - input [62:52] zexp; // Exponent of addend z - input xdenormE; // Z is denorm - input ydenormE; // Z is denorm - input zdenormE; // Z is denorm - input xzeroE; // Z is denorm - input yzeroE; // Z is denorm - output [12:0] aligncntE; // shift count for alignment shifter - output prodof; // X*Y exponent out of bounds - output [12:0] aeE; //exponent of multiply + input logic [62:52] xexp; // Exponent of multiplicand x + input logic [62:52] yexp; // Exponent of multiplicand y + input logic [62:52] zexp; // Exponent of addend z + input logic xdenormE; // Z is denorm + input logic ydenormE; // Z is denorm + input logic zdenormE; // Z is denorm + input logic xzeroE; // Z is denorm + input logic yzeroE; // Z is denorm + output logic [12:0] aligncntE; // shift count for alignment shifter + output logic prodof; // X*Y exponent out of bounds + output logic [12:0] aeE; //exponent of multiply // Internal nodes diff --git a/wally-pipelined/src/fpu/expgen2.sv b/wally-pipelined/src/fpu/expgen2.sv index a230ee33..68c36ddd 100644 --- a/wally-pipelined/src/fpu/expgen2.sv +++ b/wally-pipelined/src/fpu/expgen2.sv @@ -23,24 +23,24 @@ module expgen2(xexp, yexp, zexp, sumof, sumuf); ///////////////////////////////////////////////////////////////////////////// - input [62:52] xexp; // Exponent of multiplicand x - input [62:52] yexp; // Exponent of multiplicand y - input [62:52] zexp; // Exponent of addend z - input sumzero; // sum exactly equals zero - input resultdenorm; // postnormalize rounded result - input infinity; // generate infinity on overflow - input [4:0] FmaFlagsM; // Result invalid - input inf; // Some input is infinity - input nanM; // Some input is NaN - input [12:0] de0; // X is NaN NaN - input xnanM; // X is NaN - input ynanM; // Y is NaN - input znanM; // Z is NaN - input expplus1; - input specialsel; // Select special result - output [62:52] wexp; // Exponent of result - output sumof; // X*Y+Z exponent out of bounds - output sumuf; // X*Y+Z exponent underflows + input logic [62:52] xexp; // Exponent of multiplicand x + input logic [62:52] yexp; // Exponent of multiplicand y + input logic [62:52] zexp; // Exponent of addend z + input logic sumzero; // sum exactly equals zero + input logic resultdenorm; // postnormalize rounded result + input logic infinity; // generate infinity on overflow + input logic [4:0] FmaFlagsM; // Result invalid + input logic inf; // Some input is infinity + input logic nanM; // Some input is NaN + input logic [12:0] de0; // X is NaN NaN + input logic xnanM; // X is NaN + input logic ynanM; // Y is NaN + input logic znanM; // Z is NaN + input logic expplus1; + input logic specialsel; // Select special result + output logic [62:52] wexp; // Exponent of result + output logic sumof; // X*Y+Z exponent out of bounds + output logic sumuf; // X*Y+Z exponent underflows // Internal nodes diff --git a/wally-pipelined/src/fpu/flag1.sv b/wally-pipelined/src/fpu/flag1.sv index 2aad1d43..3a5f74a5 100644 --- a/wally-pipelined/src/fpu/flag1.sv +++ b/wally-pipelined/src/fpu/flag1.sv @@ -11,15 +11,15 @@ module flag1(xnanE, ynanE, znanE, prodof, prodinfE, nanE); ///////////////////////////////////////////////////////////////////////////// - input xnanE; // X is NaN - input ynanE; // Y is NaN - input znanE; // Z is NaN - input prodof; // X*Y overflows exponent - output nanE; // Some source is NaN + input logic xnanE; // X is NaN + input logic ynanE; // Y is NaN + input logic znanE; // Z is NaN + input logic prodof; // X*Y overflows exponent + output logic nanE; // Some source is NaN // Internal nodes - output prodinfE; // X*Y larger than max possible + output logic prodinfE; // X*Y larger than max possible // If any input is NaN, propagate the NaN diff --git a/wally-pipelined/src/fpu/flag2.sv b/wally-pipelined/src/fpu/flag2.sv index 17650ecf..c1852cc8 100644 --- a/wally-pipelined/src/fpu/flag2.sv +++ b/wally-pipelined/src/fpu/flag2.sv @@ -13,27 +13,27 @@ module flag2(xsign,ysign,zsign, xnanM, ynanM, znanM, xinfM, yinfM, zinfM, sumof, inf, nanM, FmaFlagsM,sticky,prodinfM); ///////////////////////////////////////////////////////////////////////////// - input xnanM; // X is NaN - input ynanM; // Y is NaN - input znanM; // Z is NaN - input xsign; // Sign of z - input ysign; // Sign of z - input zsign; // Sign of z - input sticky; // X is Inf - input prodinfM; - input xinfM; // X is Inf - input yinfM; // Y is Inf - input zinfM; // Z is Inf - input sumof; // X*Y + z underflows exponent - input sumuf; // X*Y + z underflows exponent - input xzeroM; // x = 0 - input yzeroM; // y = 0 - input zzeroM; // y = 0 - input killprodM; - input [1:0] vbits; // R and S bits of result - output inf; // Some source is Inf - output nanM; // Some source is NaN - output [4:0] FmaFlagsM; + input logic xnanM; // X is NaN + input logic ynanM; // Y is NaN + input logic znanM; // Z is NaN + input logic xsign; // Sign of z + input logic ysign; // Sign of z + input logic zsign; // Sign of z + input logic sticky; // X is Inf + input logic prodinfM; + input logic xinfM; // X is Inf + input logic yinfM; // Y is Inf + input logic zinfM; // Z is Inf + input logic sumof; // X*Y + z underflows exponent + input logic sumuf; // X*Y + z underflows exponent + input logic xzeroM; // x = 0 + input logic yzeroM; // y = 0 + input logic zzeroM; // y = 0 + input logic killprodM; + input logic [1:0] vbits; // R and S bits of result + output logic inf; // Some source is Inf + input logic nanM; // Some source is NaN + output logic [4:0] FmaFlagsM; // Internal nodes diff --git a/wally-pipelined/src/fpu/fma1.sv b/wally-pipelined/src/fpu/fma1.sv index de763cb4..fe547e4e 100644 --- a/wally-pipelined/src/fpu/fma1.sv +++ b/wally-pipelined/src/fpu/fma1.sv @@ -34,34 +34,34 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE, , xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE, nanE, prodinfE); ///////////////////////////////////////////////////////////////////////////// - - input [63:0] ReadData1E; // input 1 - input [63:0] ReadData2E; // input 2 - input [63:0] ReadData3E; // input 3 - input [2:0] FrmE; // Rounding mode - output [12:0] aligncntE; // status flags - output [105:0] rE; // one result of partial product sum - output [105:0] sE; // other result of partial products - output [163:0] tE; // output of alignment shifter - output [12:0] aeE; // multiplier expoent - output bsE; // sticky bit of addend - output killprodE; // ReadData3E >> product - output xzeroE; - output yzeroE; - output zzeroE; - output xdenormE; - output ydenormE; - output zdenormE; - output xinfE; - output yinfE; - output zinfE; - output xnanE; - output ynanE; - output znanE; - output nanE; - output prodinfE; - output [8:0] sumshiftE; - output sumshiftzeroE; + //***clean up code, comment, fix names, and c3f000200003fffe * 0000000000000001 + 001ffffffffffffe error + input logic [63:0] ReadData1E; // input 1 + input logic [63:0] ReadData2E; // input 2 + input logic [63:0] ReadData3E; // input 3 + input logic [2:0] FrmE; // Rounding mode + output logic [12:0] aligncntE; // status flags + output logic [105:0] rE; // one result of partial product sum + output logic [105:0] sE; // other result of partial products + output logic [163:0] tE; // output of alignment shifter + output logic [12:0] aeE; // multiplier expoent + output logic bsE; // sticky bit of addend + output logic killprodE; // ReadData3E >> product + output logic xzeroE; + output logic yzeroE; + output logic zzeroE; + output logic xdenormE; + output logic ydenormE; + output logic zdenormE; + output logic xinfE; + output logic yinfE; + output logic zinfE; + output logic xnanE; + output logic ynanE; + output logic znanE; + output logic nanE; + output logic prodinfE; + output logic [8:0] sumshiftE; + output logic sumshiftzeroE; // Internal nodes diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 8d40ab97..e23be816 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -38,40 +38,37 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, ); ///////////////////////////////////////////////////////////////////////////// - - input [63:0] ReadData1M; // input 1 - input [63:0] ReadData2M; // input 2 - input [63:0] ReadData3M; // input 3 - input [2:0] FrmM; // Rounding mode - input [12:0] aligncntM; // status flags - input [105:0] rM; // one result of partial product sum - input [105:0] sM; // other result of partial products - input [163:0] tM; // output of alignment shifter - input [8:0] normcntM; // shift count for normalizer - input [12:0] aeM; // multiplier expoent - input bsM; // sticky bit of addend - input killprodM; // ReadData3M >> product - input prodinfM; - input xzeroM; - input yzeroM; - input zzeroM; - input xdenormM; - input ydenormM; - input zdenormM; - input xinfM; - input yinfM; - input zinfM; - input xnanM; - input ynanM; - input znanM; - input nanM; - input [8:0] sumshiftM; - input sumshiftzeroM; - - - input [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M - output [4:0] FmaFlagsM; // status flags - + + input logic [63:0] ReadData1M; // input 1 + input logic [63:0] ReadData2M; // input 2 + input logic [63:0] ReadData3M; // input 3 + input logic [2:0] FrmM; // Rounding mode + input logic [12:0] aligncntM; // status flags + input logic [105:0] rM; // one result of partial product sum + input logic [105:0] sM; // other result of partial products + input logic [163:0] tM; // output of alignment shifter + input logic [8:0] normcntM; // shift count for normalizer + input logic [12:0] aeM; // multiplier expoent + input logic bsM; // sticky bit of addend + input logic killprodM; // ReadData3M >> product + input logic prodinfM; + input logic xzeroM; + input logic yzeroM; + input logic zzeroM; + input logic xdenormM; + input logic ydenormM; + input logic zdenormM; + input logic xinfM; + input logic yinfM; + input logic zinfM; + input logic xnanM; + input logic ynanM; + input logic znanM; + input logic nanM; + input logic [8:0] sumshiftM; + input logic sumshiftzeroM; + output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M + output logic [4:0] FmaFlagsM; // status flags // Internal nodes logic [163:0] sum; // output of carry prop adder diff --git a/wally-pipelined/src/fpu/fpucmp1.sv b/wally-pipelined/src/fpu/fpucmp1.sv index 5b269b89..71bdea3b 100755 --- a/wally-pipelined/src/fpu/fpucmp1.sv +++ b/wally-pipelined/src/fpu/fpucmp1.sv @@ -208,7 +208,6 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); output logic BNaN; output logic Azero; output logic Bzero; - logic [62:0] sixtythreezeros = 63'h0; assign dp = !Sel[1]&!Sel[0]; assign sp = !Sel[1]&Sel[0]; @@ -229,7 +228,7 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); // the 63 least siginficant bits of A are zero). // Depending on how this synthesizes, it may work better to replace // this with assign Azero = ~(A[62] | A[61] | ... | A[0]) - assign Azero = (A[62:0] == sixtythreezeros); - assign Bzero = (B[62:0] == sixtythreezeros); + assign Azero = (A[62:0] == 63'h0); + assign Bzero = (B[62:0] == 63'h0); endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/lza.sv b/wally-pipelined/src/fpu/lza.sv index 5574dc32..d802b564 100644 --- a/wally-pipelined/src/fpu/lza.sv +++ b/wally-pipelined/src/fpu/lza.sv @@ -12,22 +12,21 @@ module lza(sum, normcnt, sumzero); ///////////////////////////////////////////////////////////////////////////// - input [163:0] sum; // sum - output [8:0] normcnt; // normalization shift count - output sumzero; // sum = 0 + input logic [163:0] sum; // sum + output logic [8:0] normcnt; // normalization shift count + output logic sumzero; // sum = 0 // Internal nodes reg [8:0] i; // loop index - reg [8:0] normcnt; // normalization shift count - + // A real LOP uses a fast carry chain to find only the first 0. // It is an example of a parallel prefix algorithm. For the sake // of simplicity, this model is behavioral instead. // A real LOP would also operate on the sources of the adder, not // the result! - always @ ( sum) + always_comb begin i = 0; while (~sum[163-i] && i <= 163) i = i+1; // search for leading one diff --git a/wally-pipelined/src/fpu/multiply.sv b/wally-pipelined/src/fpu/multiply.sv index 351df52c..9961b494 100644 --- a/wally-pipelined/src/fpu/multiply.sv +++ b/wally-pipelined/src/fpu/multiply.sv @@ -2,14 +2,14 @@ module multiply(xman, yman, xdenormE, ydenormE, xzeroE, yzeroE, rE, sE); ///////////////////////////////////////////////////////////////////////////// - input [51:0] xman; // Fraction of multiplicand x - input [51:0] yman; // Fraction of multiplicand y - input xdenormE; // is x denormalized - input ydenormE; // is y denormalized - input xzeroE; // Z is denorm - input yzeroE; // Z is denorm - output [105:0] rE; // partial product 1 - output [105:0] sE; // partial product 2 + input logic [51:0] xman; // Fraction of multiplicand x + input logic [51:0] yman; // Fraction of multiplicand y + input logic xdenormE; // is x denormalized + input logic ydenormE; // is y denormalized + input logic xzeroE; // Z is denorm + input logic yzeroE; // Z is denorm + output logic [105:0] rE; // partial product 1 + output logic [105:0] sE; // partial product 2 wire [54:0] yExt; //y with appended 0 and assumed 1 wire [53:0] xExt; //y with assumed 1 diff --git a/wally-pipelined/src/fpu/normalize.sv b/wally-pipelined/src/fpu/normalize.sv index 6e4b9753..4582d1f2 100644 --- a/wally-pipelined/src/fpu/normalize.sv +++ b/wally-pipelined/src/fpu/normalize.sv @@ -17,35 +17,31 @@ module normalize(sum, zexp, normcnt, aeM, aligncntM, sumshiftM, sumshiftzeroM, sumzero, xzeroM, zzeroM, yzeroM, bsM, xdenormM, ydenormM, zdenormM, sticky, de0, resultdenorm, v); ///////////////////////////////////////////////////////////////////////////// - input [163:0] sum; // sum - input [62:52] zexp; // sum - input [8:0] normcnt; // normalization shift count - input [12:0] aeM; // normalization shift count - input [12:0] aligncntM; // normalization shift count - input [8:0] sumshiftM; // normalization shift count - input sumshiftzeroM; - input sumzero; // sum is zero - input bsM; // sticky bit for addend - input xdenormM; // Input Z is denormalized - input ydenormM; // Input Z is denormalized - input zdenormM; // Input Z is denormalized - input xzeroM; - input yzeroM; - input zzeroM; - output sticky; //sticky bit - output [12:0] de0; - output resultdenorm; // Input Z is denormalized - output [53:0] v; // normalized sum, R, S bits + input logic [163:0] sum; // sum + input logic [62:52] zexp; // sum + input logic [8:0] normcnt; // normalization shift count + input logic [12:0] aeM; // normalization shift count + input logic [12:0] aligncntM; // normalization shift count + input logic [8:0] sumshiftM; // normalization shift count + input logic sumshiftzeroM; + input logic sumzero; // sum is zero + input logic bsM; // sticky bit for addend + input logic xdenormM; // Input Z is denormalized + input logic ydenormM; // Input Z is denormalized + input logic zdenormM; // Input Z is denormalized + input logic xzeroM; + input logic yzeroM; + input logic zzeroM; + output logic sticky; //sticky bit + output logic [12:0] de0; + output logic resultdenorm; // Input Z is denormalized + output logic [53:0] v; // normalized sum, R, S bits // Internal nodes - reg [53:0] v; // normalized sum, R, S bits - logic resultdenorm; // Input Z is denormalized - logic [12:0] de0; - logic [163:0] sumshifted; // shifted sum +logic [163:0] sumshifted; // shifted sum logic [9:0] sumshifttmp; logic [163:0] sumshiftedtmp; // shifted sum - logic sticky; logic isShiftLeft1; logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5; @@ -62,7 +58,7 @@ logic tmp,tmp1,tmp2,tmp3,tmp4, tmp5; assign isShiftLeft1 = (aligncntM == 1 ||aligncntM == 0 || $signed(aligncntM) == $signed(-1))&& zexp == 11'h2;//((xexp == 11'h3ff && yexp == 11'h1) || (yexp == 11'h3ff && xexp == 11'h1)) && zexp == 11'h2; assign tmp = ($signed(aeM-normcnt+2) >= $signed(-1022)); - always @(sum or sumshiftM or aeM or aligncntM or normcnt or bsM or isShiftLeft1 or zexp or zdenormM) + always_comb begin // d = aligncntM // l = normcnt diff --git a/wally-pipelined/src/fpu/round.sv b/wally-pipelined/src/fpu/round.sv index 675b046a..bfe519ae 100644 --- a/wally-pipelined/src/fpu/round.sv +++ b/wally-pipelined/src/fpu/round.sv @@ -19,23 +19,23 @@ module round(v, sticky, FrmM, wsign, wman, infinity, specialsel,expplus1); ///////////////////////////////////////////////////////////////////////////// - input [53:0] v; // normalized sum, R, S bits - input sticky; //sticky bit - input [2:0] FrmM; - input wsign; // Sign of result - input [4:0] FmaFlagsM; - input inf; // Some input is infinity - input nanM; // Some input is NaN - input xnanM; // X is NaN - input ynanM; // Y is NaN - input znanM; // Z is NaN - input [51:0] xman; // Input X - input [51:0] yman; // Input Y - input [51:0] zman; // Input Z - output [51:0] wman; // rounded result of FMAC - output infinity; // Generate infinity on overflow - output specialsel; // Select special result - output expplus1; + input logic [53:0] v; // normalized sum, R, S bits + input logic sticky; //sticky bit + input logic [2:0] FrmM; + input logic wsign; // Sign of result + input logic [4:0] FmaFlagsM; + input logic inf; // Some input is infinity + input logic nanM; // Some input is NaN + input logic xnanM; // X is NaN + input logic ynanM; // Y is NaN + input logic znanM; // Z is NaN + input logic [51:0] xman; // Input X + input logic [51:0] yman; // Input Y + input logic [51:0] zman; // Input Z + output logic [51:0] wman; // rounded result of FMAC + output logic infinity; // Generate infinity on overflow + output logic specialsel; // Select special result + output logic expplus1; // Internal nodes @@ -56,7 +56,7 @@ module round(v, sticky, FrmM, wsign, // 0xx - do nothing // 100 - tie - plus1 if v[2] = 1 // 101/110/111 - plus1 - always @ (FrmM, v, wsign, sticky) begin + always_comb begin case (FrmM) 3'b000: plus1 = (v[1] & (v[0] | sticky | (~v[0]&~sticky&v[2])));//round to nearest even 3'b001: plus1 = 0;//round to zero diff --git a/wally-pipelined/src/fpu/sbtm.sv b/wally-pipelined/src/fpu/sbtm.sv index 9feb5bb3..1b4f47c9 100644 --- a/wally-pipelined/src/fpu/sbtm.sv +++ b/wally-pipelined/src/fpu/sbtm.sv @@ -11,7 +11,8 @@ module sbtm (input logic [11:0] a, output logic [10:0] ia_out); // input to CPA logic [14:0] op1; logic [14:0] op2; - logic [14:0] p; + logic [14:0] p; + logic cout; assign x0 = a[10:7]; assign x1 = a[6:4]; diff --git a/wally-pipelined/src/fpu/sign.sv b/wally-pipelined/src/fpu/sign.sv index 997bea22..b4a31fb1 100644 --- a/wally-pipelined/src/fpu/sign.sv +++ b/wally-pipelined/src/fpu/sign.sv @@ -14,32 +14,28 @@ module sign(xsign, ysign, zsign, negsum0, negsum1, bsM, FrmM, FmaFlagsM, sumzero, zinfM, inf, wsign, invz, negsum, selsum1, isAdd); ////////////////////////////////////////////////////////////////////////////I - input xsign; // Sign of X - input ysign; // Sign of Y - input zsign; // Sign of Z - input isAdd; - input negsum0; // Sum in +O mode is negative - input negsum1; // Sum in +1 mode is negative - input bsM; // sticky bit from addend - input [2:0] FrmM; // Round toward minus infinity - input [4:0] FmaFlagsM; // Round toward minus infinity - input sumzero; // Sum = O - input zinfM; // Y = Inf - input inf; // Some input = Inf - output wsign; // Sign of W - output invz; // Invert addend into adder - output negsum; // Negate result of adder - output selsum1; // Select +1 mode from compound adder + input logic xsign; // Sign of X + input logic ysign; // Sign of Y + input logic zsign; // Sign of Z + input logic isAdd; + input logic negsum0; // Sum in +O mode is negative + input logic negsum1; // Sum in +1 mode is negative + input logic bsM; // sticky bit from addend + input logic [2:0] FrmM; // Round toward minus infinity + input logic [4:0] FmaFlagsM; // Round toward minus infinity + input logic sumzero; // Sum = O + input logic zinfM; // Y = Inf + input logic inf; // Some input = Inf + output logic wsign; // Sign of W + output logic invz; // Invert addend into adder + output logic negsum; // Negate result of adder + output logic selsum1; // Select +1 mode from compound adder // Internal nodes wire zerosign; // sign if result= 0 wire sumneg; // sign if result= 0 wire infsign; // sign if result= Inf - reg negsum; // negate result of adder - reg selsum1; // select +1 mode from compound adder -logic tmp; - // Compute sign of product assign psign = xsign ^ ysign; diff --git a/wally-pipelined/src/fpu/special.sv b/wally-pipelined/src/fpu/special.sv index a290eb91..76e69f18 100644 --- a/wally-pipelined/src/fpu/special.sv +++ b/wally-pipelined/src/fpu/special.sv @@ -14,21 +14,21 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE, xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE); ///////////////////////////////////////////////////////////////////////////// - input [63:0] ReadData1E; // Input ReadData1E - input [63:0] ReadData2E; // Input ReadData2E - input [63:0] ReadData3E; // Input ReadData3E - output xzeroE; // Input ReadData1E = 0 - output yzeroE; // Input ReadData2E = 0 - output zzeroE; // Input ReadData3E = 0 - output xnanE; // ReadData1E is NaN - output ynanE; // ReadData2E is NaN - output znanE; // ReadData3E is NaN - output xdenormE; // ReadData1E is denormalized - output ydenormE; // ReadData2E is denormalized - output zdenormE; // ReadData3E is denormalized - output xinfE; // ReadData1E is infinity - output yinfE; // ReadData2E is infinity - output zinfE; // ReadData3E is infinity + input logic [63:0] ReadData1E; // Input ReadData1E + input logic [63:0] ReadData2E; // Input ReadData2E + input logic [63:0] ReadData3E; // Input ReadData3E + output logic xzeroE; // Input ReadData1E = 0 + output logic yzeroE; // Input ReadData2E = 0 + output logic zzeroE; // Input ReadData3E = 0 + output logic xnanE; // ReadData1E is NaN + output logic ynanE; // ReadData2E is NaN + output logic znanE; // ReadData3E is NaN + output logic xdenormE; // ReadData1E is denormalized + output logic ydenormE; // ReadData2E is denormalized + output logic zdenormE; // ReadData3E is denormalized + output logic xinfE; // ReadData1E is infinity + output logic yinfE; // ReadData2E is infinity + output logic zinfE; // ReadData3E is infinity // In the actual circuit design, the gates looking at bits // 51:0 and at bits 62:52 should be shared among the various detectors.