diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv index cd3e2a4d..bf09314e 100644 --- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv +++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv @@ -48,7 +48,7 @@ assign FOpCtrlE = 3'b0; // up - 011 // nearest max mag - 100 assign FrmE = 3'b000; -assign FmtE = 1'b0; +assign FmtE = 1'b1; logic [`FLEN-1:0] X, Y, Z; // logic FmtE; @@ -76,9 +76,9 @@ assign FmtE = 1'b0; assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31]; assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31]; - assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; - assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; - assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]};//{Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; + assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; + assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; + assign ZExpE = FmtE ? Addend[62:52] : {Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0}; assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0}; @@ -122,7 +122,7 @@ assign FmtE = 1'b0; assign YZeroE = YExpZero & YFracZero; assign ZZeroE = ZExpZero & ZFracZero; - assign BiasE = FmtE ? {1'b0, {`NE-1{1'b1}}} : 13'h7f; + assign BiasE = 13'h3ff; assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF] && |FMAResM[`NF-1:0] : &FMAResM[30:23] && |FMAResM[22:0]; // assign XNaNE = FmtE ? &X[62:52] && |X[51:0] : &X[62:55] && |X[54:32]; @@ -203,7 +203,7 @@ always @(posedge clk) if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); errors = errors + 1; - //if (errors == 10) + if (errors == 10) $stop; end vectornum = vectornum + 1; diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh index c7cf5f09..0741e9d6 100755 --- a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh +++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh @@ -1,3 +1,3 @@ -testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat +testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat tr -d ' ' < testFloat > testFloatNoSpace diff --git a/wally-pipelined/src/fpu/convert_inputs.sv b/wally-pipelined/src/fpu/convert_inputs.sv index 628519a7..bf56cb00 100755 --- a/wally-pipelined/src/fpu/convert_inputs.sv +++ b/wally-pipelined/src/fpu/convert_inputs.sv @@ -8,7 +8,7 @@ module convert_inputs( input [63:0] op1, // 1st input operand (A) input [63:0] op2, // 2nd input operand (B) - input [3:0] op_type, // Function opcode + input [2:0] op_type, // Function opcode input P, // Result Precision (0 for double, 1 for single) output [63:0] Float1, // Converted 1st input operand @@ -16,8 +16,6 @@ module convert_inputs( ); wire conv_SP; // Convert from SP to DP - wire negate; // Operation is negation - wire abs_val; // Operation is absolute value wire Zexp1; // One if the exponent of op1 is zero wire Zexp2; // One if the exponent of op2 is zero wire Oexp1; // One if the exponent of op1 is all ones @@ -25,7 +23,7 @@ module convert_inputs( // Convert from single precision to double precision if (op_type is 11X // and P is 0) or (op_type is not 11X and P is one). - assign conv_SP = (op_type[2]&op_type[1]) ^ P; + assign conv_SP = ~P; // Test if the input exponent is zero, because if it is then the // exponent of the converted number should be zero. @@ -40,17 +38,14 @@ module convert_inputs( assign Float1[28:0] = op1[28:0] & {29{~conv_SP}}; // Conditionally convert op2. Lower 29 bits are zero for single precision. - assign Float2[62:29] = conv_SP ? {op2[30], - {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]} + assign Float2[62:29] = conv_SP ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]} : op2[62:29]; assign Float2[28:0] = op2[28:0] & {29{~conv_SP}}; // Set the sign of Float1 based on its original sign and if the operation // is negation (op_type = 101) or absolute value (op_type = 100) - assign negate = op_type[2] & ~op_type[1] & op_type[0]; - assign abs_val = op_type[2] & ~op_type[1] & ~op_type[0]; //*** remove abs_val - assign Float1[63] = conv_SP ? (op1[31] ^ negate) & ~abs_val : (op1[63] ^ negate) & ~abs_val; + assign Float1[63] = conv_SP ? op1[31] : op1[63]; assign Float2[63] = conv_SP ? op2[31] : op2[63]; endmodule // convert_inputs diff --git a/wally-pipelined/src/fpu/exception.sv b/wally-pipelined/src/fpu/exception.sv index c24586a1..bccfa01f 100755 --- a/wally-pipelined/src/fpu/exception.sv +++ b/wally-pipelined/src/fpu/exception.sv @@ -1,95 +1,58 @@ // Exception logic for the floating point adder. Note: We may // actually want to move to where the result is computed. -module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type); +module exception ( - input [63:0] A; // 1st input operand (op1) - input [63:0] B; // 2nd input operand (op2) - input [3:0] op_type; // Function opcode - output [3:0] Ztype; // Indicates type of result (Z) - output Invalid; // Invalid operation exception - output Denorm; // Denormalized input - output ANorm; // A is not zero or Denorm - output BNorm; // B is not zero or Denorm - output Sub; // The effective operation is subtraction - wire AzeroM; // '1' if the mantissa of A is zero - wire BzeroM; // '1' if the mantissa of B is zero - wire AzeroE; // '1' if the exponent of A is zero - wire BzeroE; // '1' if the exponent of B is zero - wire AonesE; // '1' if the exponent of A is all ones - wire BonesE; // '1' if the exponent of B is all ones - wire ADenorm; // '1' if A is a denomalized number - wire BDenorm; // '1' if B is a denomalized number - wire AInf; // '1' if A is infinite - wire BInf; // '1' if B is infinite - wire AZero; // '1' if A is 0 - wire BZero; // '1' if B is 0 - wire ANaN; // '1' if A is a not-a-number - wire BNaN; // '1' if B is a not-a-number - wire ASNaN; // '1' if A is a signalling not-a-number - wire BSNaN; // '1' if B is a signalling not-a-number + input logic [2:0] op_type, // Function opcode + input logic XSgnE, YSgnE, + // input logic [52:0] XManE, YManE, + input logic XDenormE, YDenormE, + input logic XNormE, YNormE, + input logic XZeroE, YZeroE, + input logic XInfE, YInfE, + input logic XNaNE, YNaNE, + input logic XSNaNE, YSNaNE, + output logic [3:0] Ztype, // Indicates type of result (Z) + output logic Invalid, // Invalid operation exception + output logic Denorm, // Denormalized logic + output logic Sub // The effective operation is subtraction +); wire ZQNaN; // '1' if result Z is a quiet NaN wire ZPInf; // '1' if result Z positive infnity wire ZNInf; // '1' if result Z negative infnity wire add_sub; // '1' if operation is add or subtract wire converts; // See if there are any converts - parameter [51:0] fifty_two_zeros = 52'h0000000000000; // Use parameter? // Is this instruction a convert - assign converts = ~(~op_type[1] & ~op_type[2]); + assign converts = op_type[1]; - // Determine if mantissas are all zeros - assign AzeroM = (A[51:0] == fifty_two_zeros); - assign BzeroM = (B[51:0] == fifty_two_zeros); - // Determine if exponents are all ones or all zeros - assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]; - assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]; - assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]); - assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]); - - // Determine special cases. Note: Zero is not really a special case. - assign ADenorm = AzeroE & ~AzeroM; - assign BDenorm = BzeroE & ~BzeroM; - assign AInf = AonesE & AzeroM; - assign BInf = BonesE & BzeroM; - assign ANaN = AonesE & ~AzeroM; - assign BNaN = BonesE & ~BzeroM; - assign ASNaN = ANaN & ~A[51]; - assign BSNaN = BNaN & ~B[51]; - assign AZero = AzeroE & AzeroM; - assign BZero = BzeroE & BzeroE; - - // A and B are normalized if their exponents are not zero. - assign ANorm = ~AzeroE; - assign BNorm = ~BzeroE; // An "Invalid Operation" exception occurs if (A or B is a signalling NaN) // or (A and B are both Infinite and the "effective operation" is // subtraction). - assign add_sub = ~op_type[2] & ~op_type[1]; - assign Invalid = (ASNaN | BSNaN | - (add_sub & AInf & BInf & (A[63]^B[63]^op_type[0]))) & ~converts; + assign add_sub = ~op_type[1]; + assign Invalid = (XSNaNE | YSNaNE | (add_sub & XInfE & YInfE & (XSgnE^YSgnE^op_type[0]))) & ~converts; // The Denorm flag is set if (A is denormlized and the operation is not integer // conversion ) or (if B is normalized and the operation is addition or subtraction). - assign Denorm = ADenorm&(op_type[2]|~op_type[1]) | BDenorm & add_sub; + assign Denorm = XDenormE | YDenormE & add_sub; // The result is a quiet NaN if (an "Invalid Operation" exception occurs) // or (A is a NaN) or (B is a NaN and the operation uses B). - assign ZQNaN = Invalid | ANaN | (BNaN & add_sub); + assign ZQNaN = Invalid | XNaNE | (YNaNE & add_sub); // The result is +Inf if ((A is +Inf) or (B is -Inf and the operation is // subtraction) or (B is +Inf and the operation is addition)) and (the // result is not a quiet NaN). - assign ZPInf = (AInf&A[63] | add_sub&BInf&(~B[63]^op_type[0]))&~ZQNaN; + assign ZPInf = (XInfE&XSgnE | add_sub&YInfE&(~YSgnE^op_type[0]))&~ZQNaN; // The result is -Inf if ((A is -Inf) or (B is +Inf and the operation is // subtraction) or (B is -Inf and the operation is addition)) and the // result is not a quiet NaN. - assign ZNInf = (AInf&~A[63] | add_sub&BInf&(B[63]^op_type[0]))&~ZQNaN; + assign ZNInf = (XInfE&~XSgnE | add_sub&YInfE&(YSgnE^op_type[0]))&~ZQNaN; // Set the type of the result as follows: // (needs optimization - got lazy or was late) @@ -102,19 +65,19 @@ module exception (Ztype, Invalid, Denorm, ANorm, BNorm, Sub, A, B, op_type); // 0101 +Bzero and -Azero (and vice-versa) // 1000 Convert SP to DP (and vice-versa) - assign Ztype[0] = ((ZQNaN | ZPInf) & ~(~op_type[2] & op_type[1])) | - ((AZero & BZero & (A[63]^B[63]^op_type[0])) + assign Ztype[0] = (ZQNaN | ZPInf) | + ((XZeroE & YZeroE & (XSgnE^YSgnE^op_type[0])) & ~converts); - assign Ztype[1] = ((ZNInf | ZPInf) & ~(~op_type[2] & op_type[1])) | - (((AZero & BZero & A[63] & B[63] & ~op_type[0]) | - (AZero & BZero & A[63] & ~B[63] & op_type[0])) + assign Ztype[1] = (ZNInf | ZPInf) | + (((XZeroE & YZeroE & XSgnE & YSgnE & ~op_type[0]) | + (XZeroE & YZeroE & XSgnE & ~YSgnE & op_type[0])) & ~converts); - assign Ztype[2] = ((AZero & BZero & ~op_type[1] & ~op_type[2]) + assign Ztype[2] = ((XZeroE & YZeroE & ~op_type[1]) & ~converts); - assign Ztype[3] = (op_type[1] & op_type[2] & ~op_type[0]); + assign Ztype[3] = (op_type[1] & ~op_type[0]); // Determine if the effective operation is subtraction - assign Sub = ~(op_type[3] & ~op_type[0]) & ( (op_type[3] & op_type[0]) | (add_sub & (A[63]^B[63]^op_type[0])) ); + assign Sub = add_sub & (XSgnE^YSgnE^op_type[0]); endmodule // exception diff --git a/wally-pipelined/src/fpu/exception_div.sv b/wally-pipelined/src/fpu/exception_div.sv index e917f127..37432068 100755 --- a/wally-pipelined/src/fpu/exception_div.sv +++ b/wally-pipelined/src/fpu/exception_div.sv @@ -27,7 +27,7 @@ module exception_div ( logic ZInf; // '1' if result Z is an infnity logic Zero; // '1' if result is zero - + //***take this module out and add more registers or just recalculate it all // Determine if mantissas are all zeros assign AzeroM = (A[51:0] == 52'h0); assign BzeroM = (B[51:0] == 52'h0); diff --git a/wally-pipelined/src/fpu/faddcvt.sv b/wally-pipelined/src/fpu/faddcvt.sv index a604f887..e09deae6 100755 --- a/wally-pipelined/src/fpu/faddcvt.sv +++ b/wally-pipelined/src/fpu/faddcvt.sv @@ -33,9 +33,22 @@ module faddcvt( input logic StallM, // stall the memory stage input logic [63:0] FSrcXE, // 1st input operand (A) input logic [63:0] FSrcYE, // 2nd input operand (B) - input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode + input logic [2:0] FOpCtrlE, FOpCtrlM, // Function opcode input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single) input logic [2:0] FrmM, // Rounding mode - specify values + input logic XSgnE, YSgnE, + input logic [52:0] XManE, YManE, + input logic [10:0] XExpE, YExpE, + input logic XSgnM, YSgnM, + input logic [52:0] XManM, YManM, + input logic [10:0] XExpM, YExpM, + input logic XDenormE, YDenormE, + input logic XNormE, YNormE, + input logic XNormM, YNormM, + input logic XZeroE, YZeroE, + input logic XInfE, YInfE, + input logic XNaNE, YNaNE, + input logic XSNaNE, YSNaNE, output logic [63:0] FAddResM, // Result of operation output logic [4:0] FAddFlgM); // IEEE exception flags @@ -44,53 +57,53 @@ module faddcvt( logic [3:0] AddSelInvE, AddSelInvM; logic [10:0] AddExpPostSumE,AddExpPostSumM; logic AddCorrSignE, AddCorrSignM; - logic AddOp1NormE, AddOp1NormM; - logic AddOp2NormE, AddOp2NormM; logic AddOpANormE, AddOpANormM; logic AddOpBNormE, AddOpBNormM; logic AddInvalidE, AddInvalidM; logic AddDenormInE, AddDenormInM; logic AddSwapE, AddSwapM; logic AddSignAE, AddSignAM; - logic AddConvertE, AddConvertM; - logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; logic [10:0] AddExponentE, AddExponentM; - fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + fpuaddcvt1 fpadd1 (.FOpCtrlE, .FmtE, .AddExponentE, .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, - .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, - .AddDenormInE, .AddConvertE, .AddSwapE); + .XSgnE, .YSgnE,.XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, + .AddCorrSignE, .AddSignAE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddSwapE); // E/M pipeline registers flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(14) EMRegAdd9(clk, reset, FlushM, ~StallM, - {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddSignAE}, - {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM}); + flopenrc #(11) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddSwapE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddSwapM, AddSignAM}); - fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, - .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, - .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, - .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .XNormM, .YNormM, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM, + .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddSwapM, .FAddResM, .FAddFlgM); endmodule module fpuaddcvt1 ( - input logic [63:0] FSrcXE, // 1st input operand (A) - input logic [63:0] FSrcYE, // 2nd input operand (B) - input logic [3:0] FOpCtrlE, // Function opcode + input logic [2:0] FOpCtrlE, // Function opcode input logic FmtE, // Result Precision (1 for double, 0 for single) + input logic XSgnE, YSgnE, + input logic [10:0] XExpE, YExpE, + input logic [52:0] XManE, YManE, + input logic XDenormE, YDenormE, + input logic XNormE, YNormE, + input logic XZeroE, YZeroE, + input logic XInfE, YInfE, + input logic XNaNE, YNaNE, + input logic XSNaNE, YSNaNE, - output logic [63:0] AddFloat1E, - output logic [63:0] AddFloat2E, output logic [10:0] AddExponentE, output logic [10:0] AddExpPostSumE, output logic [11:0] AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0] @@ -98,11 +111,9 @@ module fpuaddcvt1 ( output logic [3:0] AddSelInvE, output logic AddCorrSignE, output logic AddSignAE, - output logic AddOp1NormE, AddOp2NormE, output logic AddOpANormE, AddOpBNormE, output logic AddInvalidE, output logic AddDenormInE, - output logic AddConvertE, output logic AddSwapE ); @@ -112,7 +123,7 @@ module fpuaddcvt1 ( wire ZV_mantissaB; wire P; - assign P = ~FmtE; + assign P = ~(FmtE^FOpCtrlE[1]); wire [63:0] IntValue; wire [11:0] exp1, exp2; @@ -130,22 +141,15 @@ module fpuaddcvt1 ( wire zeroB; wire [5:0] align_shift; - // Convert the input operands to their appropriate forms based on - // the orignal operands, the FOpCtrlE , and their precision P. - // Single precision inputs are converted to double precision - // and the sign of the first operand is set appropratiately based on - // if the operation is absolute value or negation. - - convert_inputs conv1 (.Float1(AddFloat1E), .Float2(AddFloat2E), .op1(FSrcXE), .op2(FSrcYE), .op_type(FOpCtrlE), .P); - // Test for exceptions and return the "Invalid Operation" and // "Denormalized" Input Flags. The "AddSelInvE" is used in // the third pipeline stage to select the result. Also, AddOp1NormE // and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized. // sub is one if the effective operation is subtaction. - exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, - AddFloat1E, AddFloat2E, FOpCtrlE); + exception exc1 (.Ztype(AddSelInvE), .Invalid(AddInvalidE), .Denorm(AddDenormInE), .Sub(sub), + .XSgnE, .YSgnE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, + .op_type(FOpCtrlE)); // Perform Exponent Subtraction (used for alignment). For performance // both exponent subtractions are performed in parallel. This was @@ -153,25 +157,25 @@ module fpuaddcvt1 ( // the two parallel additions. The input values are zero-extended to 12 // bits prior to performing the addition. - assign exp1 = {1'b0, AddFloat1E[62:52]}; - assign exp2 = {1'b0, AddFloat2E[62:52]}; + assign exp1 = {1'b0, XExpE}; + assign exp2 = {1'b0, YExpE}; assign exp_diff1 = exp1 - exp2; - assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1; + assign exp_diff2 = AddDenormInE ? ({YSgnE, YExpE} - {XSgnE, XExpE}): exp2 - exp1; // The second operand (B) should be set to zero, if FOpCtrlE does not // specify addition or subtraction - assign zeroB = FOpCtrlE[2] | FOpCtrlE[1]; + assign zeroB = FOpCtrlE[1]; // Swapped operands if zeroB is not one and exp1 < exp2. // Swapping causes exp2 to be used for the result exponent. // Only the exponent of the larger operand is used to determine // the final result. assign AddSwapE = exp_diff1[11] & ~zeroB; - assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0]; - assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0]; - assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0]; - assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0]; - assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63]; + assign AddExponentE = AddSwapE ? YExpE : XExpE; + assign AddExpPostSumE = AddSwapE ? YExpE : XExpE; + assign mantissaA = AddSwapE ? YManE[51:0] : XManE[51:0]; + assign mantissaB = AddSwapE ? XManE[51:0] : YManE[51:0]; + assign AddSignAE = AddSwapE ? YSgnE : XSgnE; // Leading-Zero Detector. Determine the size of the shift needed for // normalization. If sum_corrected is all zeros, the exp_valid is @@ -201,8 +205,8 @@ module fpuaddcvt1 ( // and loss of sign information. The two bits to the right of the // original mantissa form the "guard" and "round" bits that are used // to round the result. - assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE; - assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE; + assign AddOpANormE = AddSwapE ? YNormE : XNormE; + assign AddOpBNormE = AddSwapE ? XNormE : YNormE; assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0}; assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0}; @@ -223,19 +227,18 @@ module fpuaddcvt1 ( // and the exponent value is left unchanged. // Under denormalized cases, the exponent before the rounder is set to 1 // if the normal shift value is 11. - assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1]; - assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0})); + assign mantissaA3 = AddDenormInE ? ({12'h0, mantissaA}) : {mantissaA1, 7'h0}; // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six // zeros. - assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}}); - assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB); - assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0); + assign mantissaB3[63:7] = AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}}; + assign mantissaB3[6] = AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB; + assign mantissaB3[5:0] = AddDenormInE ? mantissaB[5:0] : 6'h0; // The sign of the result needs to be corrected if the true // operation is subtraction and the input operands were swapped. - assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE; + assign AddCorrSignE = ~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE; // 64-bit Mantissa Adder/Subtractor cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder @@ -281,31 +284,31 @@ endmodule // fpadd module fpuaddcvt2 ( - input [2:0] FrmM, // Rounding mode - specify values - input [3:0] FOpCtrlM, // Function opcode - input FmtM, // Result Precision (0 for double, 1 for single) - input [63:0] AddSumM, AddSumTcM, - input [63:0] AddFloat1M, - input [63:0] AddFloat2M, - input [11:0] AddExp1DenormM, AddExp2DenormM, - input [10:0] AddExponentM, AddExpPostSumM, - input [3:0] AddSelInvM, - input AddOp1NormM, AddOp2NormM, - input AddOpANormM, AddOpBNormM, - input AddInvalidM, - input AddDenormInM, - input AddSignAM, - input AddCorrSignM, - input AddConvertM, - input AddSwapM, + input logic [2:0] FrmM, // Rounding mode - specify values + input logic [2:0] FOpCtrlM, // Function opcode + input logic FmtM, // Result Precision (0 for double, 1 for single) + input logic [63:0] AddSumM, AddSumTcM, + input logic [11:0] AddExp1DenormM, AddExp2DenormM, + input logic [10:0] AddExponentM, AddExpPostSumM, + input logic [3:0] AddSelInvM, + input logic XSgnM, YSgnM, + input logic [52:0] XManM, YManM, + input logic [10:0] XExpM, YExpM, + input logic XNormM, YNormM, + input logic AddOpANormM, AddOpBNormM, + input logic AddInvalidM, + input logic AddDenormInM, + input logic AddSignAM, + input logic AddCorrSignM, + input logic AddSwapM, - output [63:0] FAddResM, // Result of operation - output [4:0] FAddFlgM // IEEE exception flags + output logic [63:0] FAddResM, // Result of operation + output logic [4:0] FAddFlgM // IEEE exception flags ); wire AddDenormM; // AddDenormM on input or output wire P; - assign P = ~FmtM; + assign P = ~(FmtM^FOpCtrlM[1]); wire [10:0] exp_pre; wire [63:0] Result; @@ -338,15 +341,15 @@ module fpuaddcvt2 ( //cases/conversion cases assign exp_pre = AddDenormInM ? ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0])) - : (AddConvertM ? 11'b10000111100 : AddExponentM); + : AddExponentM; // Finds normal underflow result to determine whether to round final AddExponentM down // Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor - assign Float1_sum_comp = (AddFloat1M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1; - assign Float2_sum_comp = (AddFloat2M[51:0] > AddSumM[51:0]) ? 1'b0 : 1'b1; - assign Float1_sum_tc_comp = (AddFloat1M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1; - assign Float2_sum_tc_comp = (AddFloat2M[51:0] > AddSumTcM[51:0]) ? 1'b0 : 1'b1; + assign Float1_sum_comp = ~(XManM[51:0] > AddSumM[51:0]); + assign Float2_sum_comp = ~(YManM[51:0] > AddSumM[51:0]); + assign Float1_sum_tc_comp = ~(XManM[51:0] > AddSumTcM[51:0]); + assign Float2_sum_tc_comp = ~(YManM[51:0] > AddSumTcM[51:0]); // Determines the correct Float value to compare based on AddSwapM result assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp; @@ -357,16 +360,16 @@ module fpuaddcvt2 ( // If the signs are different and both operands aren't denormalized // the normal underflow bit is needed and therefore updated. - assign normal_underflow = ((AddFloat1M[63] ~^ AddFloat2M[63]) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0; + assign normal_underflow = ((XSgnM ^ YSgnM) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0; // Determine the correct sign of the result - assign sign_corr = ((AddCorrSignM ^ AddSignAM) & ~AddConvertM) ^ AddSumM[63]; + assign sign_corr = (AddCorrSignM ^ AddSignAM) ^ AddSumM[63]; // If the AddSumM is negative, use its two complement instead. // This value has to be 64-bits to correctly handle the // case 10...00 - assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) )) - ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM)); + assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (XSgnM ~^ YSgnM) & FOpCtrlM[0] ) | ((XSgnM ^ YSgnM) & ~FOpCtrlM[0]) )) + ? (AddSumM[63] ? AddSumM : AddSumTcM) : (AddSumM[63] ? AddSumTcM : AddSumM); // Finds normal underflow result to determine whether to round final AddExponentM down //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be @@ -384,7 +387,7 @@ module fpuaddcvt2 ( // be right shifted. It outputs the normalized AddSumM. barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm); - assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm); + assign sum_norm_w_bypass = sum_norm; // Round the mantissa to a 52-bit value, with the leading one // removed. If the result is a single precision number, the actual @@ -397,10 +400,10 @@ module fpuaddcvt2 ( // help in processor reservation station detection of load/stores. In // other words, the processor would like to know ahead of time that // if the result is an exception then don't load or store. - rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid, - AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass, - AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52], - AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM); + rounder round1 (.Result, .DenormIO, .Flags(FlagsIn), .rm(FrmM), .P, .OvEn(AddOvEnM), .UnEn(AddUnEnM), .exp_valid, + .sel_inv(AddSelInvM), .Invalid(AddInvalidM), .DenormIn(AddDenormInM), .Asign(sign_corr), .Aexp(exp_pre), .norm_shift, .A(sum_norm_w_bypass), + .exponent_postsum(AddExpPostSumM), .A_Norm(XNormM), .B_Norm(YNormM), .exp_A_unmodified({XSgnM, XExpM}), .exp_B_unmodified({YSgnM, YExpM}), + .normal_overflow(AddNormOvflowM), .normal_underflow, .swap(AddSwapM), .op_type(FOpCtrlM), .sum(AddSumM)); // Store the final result and the exception flags in registers. assign FAddResM = Result; diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index 6eead441..61a4af0a 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -9,7 +9,7 @@ module fctrl ( output logic FRegWriteD, // FP register write enable output logic FDivStartD, // Start division or squareroot output logic [2:0] FResultSelD, // select result to be written to fp register - output logic [3:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit + output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit output logic [1:0] FResSelD, // select one of the results done in the memory stage output logic [1:0] FIntResSelD, // select the result that will be written to the integer register output logic FmtD, // precision - single-0 double-1 @@ -24,82 +24,82 @@ module fctrl ( case(OpD) // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw - 3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b1_0_000_000_00_00_0_0; // flw + 3'b011: ControlsD = `FCTRLW'b1_0_000_001_00_00_0_0; // fld + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw - 3'b011: ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b0_0_000_010_00_00_0_0; // fsw + 3'b011: ControlsD = `FCTRLW'b0_0_000_011_00_00_0_0; // fsd + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase - 7'b1000011: ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd - 7'b1000111: ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub - 7'b1001011: ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub - 7'b1001111: ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd + 7'b1000011: ControlsD = `FCTRLW'b1_0_001_000_00_00_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_001_001_00_00_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_001_010_00_00_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_001_011_00_00_0_0; // fnmadd 7'b1010011: casez(Funct7D) - 7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd - 7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub - 7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul - 7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv - 7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt + 7'b00000??: ControlsD = `FCTRLW'b1_0_010_000_00_00_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_010_001_00_00_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_001_100_00_00_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_011_000_00_00_1_0; // fdiv + 7'b01011??: ControlsD = `FCTRLW'b1_0_011_001_00_00_1_0; // fsqrt 7'b00100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj - 3'b001: ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn - 3'b010: ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 3'b000: ControlsD = `FCTRLW'b1_0_100_000_01_00_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_100_001_01_00_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_100_010_01_00_0_0; // fsgnjx + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase 7'b00101??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin - 3'b001: ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 3'b000: ControlsD = `FCTRLW'b1_0_100_111_10_00_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_100_101_10_00_0_0; // fmax + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase 7'b10100??: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq - 3'b001: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt - 3'b000: ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 3'b010: ControlsD = `FCTRLW'b0_1_100_010_00_00_0_0; // feq + 3'b001: ControlsD = `FCTRLW'b0_1_100_001_00_00_0_0; // flt + 3'b000: ControlsD = `FCTRLW'b0_1_100_011_00_00_0_0; // fle + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase 7'b11100??: if (Funct3D == 3'b001) - ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass - else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w - else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d - else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + ControlsD = `FCTRLW'b0_1_100_000_00_10_0_0; // fclass + else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_100_00_01_0_0; // fmv.x.w + else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_101_00_01_0_0; // fmv.x.d + else ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction 7'b1101000: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.s.w - 2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.s.wu - 2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.s.l - 2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.s.lu + 2'b00: ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.s.w + 2'b01: ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.s.wu + 2'b10: ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.s.l + 2'b11: ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.s.lu default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction endcase 7'b1100000: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.s - 2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.s - 2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.s - 2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.s - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 2'b00: ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.s + 2'b01: ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.s + 2'b10: ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.s + 2'b11: ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.s + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase - 7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x - 7'b0100000: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.s.d + 7'b1111000: ControlsD = `FCTRLW'b1_0_100_000_00_00_0_0; // fmv.w.x + 7'b0100000: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.s.d 7'b1101001: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b1_0_100_0001_11_00_0_0; // fcvt.d.w - 2'b01: ControlsD = `FCTRLW'b1_0_100_0101_11_00_0_0; // fcvt.d.wu - 2'b10: ControlsD = `FCTRLW'b1_0_100_1001_11_00_0_0; // fcvt.d.l - 2'b11: ControlsD = `FCTRLW'b1_0_100_1101_11_00_0_0; // fcvt.d.lu - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 2'b00: ControlsD = `FCTRLW'b1_0_100_000_11_00_0_0; // fcvt.d.w + 2'b01: ControlsD = `FCTRLW'b1_0_100_010_11_00_0_0; // fcvt.d.wu + 2'b10: ControlsD = `FCTRLW'b1_0_100_100_11_00_0_0; // fcvt.d.l + 2'b11: ControlsD = `FCTRLW'b1_0_100_110_11_00_0_0; // fcvt.d.lu + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase 7'b1100001: case(Rs2D[1:0]) - 2'b00: ControlsD = `FCTRLW'b0_1_100_0010_11_11_0_0; // fcvt.w.d - 2'b01: ControlsD = `FCTRLW'b0_1_100_0110_11_11_0_0; // fcvt.wu.d - 2'b10: ControlsD = `FCTRLW'b0_1_100_1010_11_11_0_0; // fcvt.l.d - 2'b11: ControlsD = `FCTRLW'b0_1_100_1110_11_11_0_0; // fcvt.lu.d - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 2'b00: ControlsD = `FCTRLW'b0_1_100_001_11_11_0_0; // fcvt.w.d + 2'b01: ControlsD = `FCTRLW'b0_1_100_011_11_11_0_0; // fcvt.wu.d + 2'b10: ControlsD = `FCTRLW'b0_1_100_101_11_11_0_0; // fcvt.l.d + 2'b11: ControlsD = `FCTRLW'b0_1_100_111_11_11_0_0; // fcvt.lu.d + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase - 7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x - 7'b0100001: ControlsD = `FCTRLW'b1_0_010_0111_00_00_0_0; // fcvt.d.s - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 7'b1111001: ControlsD = `FCTRLW'b1_0_100_001_00_00_0_0; // fmv.d.x + 7'b0100001: ControlsD = `FCTRLW'b1_0_010_111_00_00_0_0; // fcvt.d.s + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase - default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + default: ControlsD = `FCTRLW'b0_0_000_000_00_00_0_1; // non-implemented instruction endcase // unswizzle control bits @@ -117,7 +117,7 @@ module fctrl ( // Precision // 0-single // 1-double - assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0]; + assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : FResultSelD == 3'b010 ? Funct7D[0]^FOpCtrlD[1] : OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0]; // FResultSel: // 000 - ReadRes - load diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv index a8f845a6..17da8030 100644 --- a/wally-pipelined/src/fpu/fcvt.sv +++ b/wally-pipelined/src/fpu/fcvt.sv @@ -11,7 +11,7 @@ module fcvt ( input logic XDenormE, // is X denormalized input logic [10:0] BiasE, // bias - depends on precision (max exponent/2) input logic [`XLEN-1:0] SrcAE, // integer input - input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below) + input logic [2:0] FOpCtrlE, // chooses which instruction is done (full list below) input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic FmtE, // precision 1 = double 0 = single output logic [63:0] CvtResE, // convert final result @@ -43,27 +43,27 @@ module fcvt ( logic RoundSgn; // sign of the rounded result // FOpCtrlE: - // fcvt.w.s = 0010 - // fcvt.wu.s = 0110 - // fcvt.s.w = 0001 - // fcvt.s.wu = 0101 - // fcvt.l.s = 1010 - // fcvt.lu.s = 1110 - // fcvt.s.l = 1001 - // fcvt.s.lu = 1101 - // fcvt.w.d = 0010 - // fcvt.wu.d = 0110 - // fcvt.d.w = 0001 - // fcvt.d.wu = 0101 - // fcvt.l.d = 1010 - // fcvt.lu.d = 1110 - // fcvt.d.l = 1001 - // fcvt.d.lu = 1101 + // fcvt.w.s = 001 + // fcvt.wu.s = 011 + // fcvt.s.w = 000 + // fcvt.s.wu = 010 + // fcvt.l.s = 101 + // fcvt.lu.s = 111 + // fcvt.s.l = 100 + // fcvt.s.lu = 110 + // fcvt.w.d = 001 + // fcvt.wu.d = 011 + // fcvt.d.w = 000 + // fcvt.d.wu = 010 + // fcvt.l.d = 101 + // fcvt.lu.d = 111 + // fcvt.d.l = 100 + // fcvt.d.lu = 110 // {long, unsigned, to int, from int} // calculate signals based off the input and output's size - assign Res64 = (FOpCtrlE[1]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[0]); - assign In64 = (FOpCtrlE[0]&FOpCtrlE[3]) | (FmtE&FOpCtrlE[1]); + assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]); + assign In64 = (~FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&FOpCtrlE[0]); assign SubBits = In64 ? 8'd64 : 8'd32; assign Bits = Res64 ? 8'd64 : 8'd32; @@ -73,11 +73,11 @@ module fcvt ( //////////////////////////////////////////////////////// // position the input in the most significant bits - assign IntIn = FOpCtrlE[3] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0}; + assign IntIn = FOpCtrlE[2] ? {SrcAE, {64-`XLEN{1'b0}}} : {SrcAE[31:0], 32'b0}; // make the integer positive - assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn; + assign PosInt = IntIn[64-1]&~FOpCtrlE[1] ? -IntIn : IntIn; // determine the integer's sign - assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0; + assign ResSgn = ~FOpCtrlE[1] ? IntIn[64-1] : 1'b0; // Leading one detector logic [8:0] i; @@ -97,8 +97,8 @@ module fcvt ( // select the shift value and amount based on operation (to fp or int) - assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP; - assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XManE} : {PosInt, 52'b0}; + assign ShiftCnt = FOpCtrlE[0] ? ExpVal : LZResP; + assign ShiftVal = FOpCtrlE[0] ? {{64-2{1'b0}}, XManE} : {PosInt, 52'b0}; // if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds) // if the shift is negitive add a bit for sticky bit calculation @@ -111,35 +111,35 @@ module fcvt ( // calculate sticky bit // - take into account the possible right shift from before // - the sticky bit calculation covers three diffrent sizes depending on the opperation - assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); + assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (~FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (~FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); // determine guard, round, and least significant bit of the result - assign Guard = FOpCtrlE[1] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42]; - assign Round = FOpCtrlE[1] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41]; - assign LSB = FOpCtrlE[1] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43]; + assign Guard = FOpCtrlE[0] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42]; + assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41]; + assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43]; always_comb begin // Determine if you add 1 case (FrmE) 3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down - 3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up + 3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[0]) | (ResSgn&~FOpCtrlE[0]);//round down + 3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[0]) | (~ResSgn&~FOpCtrlE[0]);//round up 3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase end // dont tound if the result is exact - assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]); + assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[0]); // round the shifted mantissa assign RoundedTmp = ShiftedMan[64+1:2] + Plus1; assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ; // fit the rounded result into the appropriate size and take the 2's complement if needed - assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : + assign Rounded = Res64 ? XSgnE&FOpCtrlE[0] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]}; // extract the MSB and Sign for later use (will be used to determine underflow and overflow) @@ -148,29 +148,29 @@ module fcvt ( // check if the result overflows - assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE; + assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[1]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE; // check if the result underflows (this calculation changes if the result is signed or unsigned) - assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded; + assign Uf = FOpCtrlE[1] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded; // calculate the result's sign - assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1]; + assign SgnRes = ~FOpCtrlE[2] & FOpCtrlE[0]; // select the integer result - assign CvtIntRes = Of ? FOpCtrlE[2] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : - Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} : + assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : + Uf ? FOpCtrlE[1] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} : Rounded[64-1:0]; // select the floating point result assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]}; // select the result - assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes; + assign CvtResE = ~FOpCtrlE[0] ? CvtFPRes : CvtIntRes; // calculate the flags // - to int only sets the invalid flag // - from int only sets the inexact flag - assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]}; + assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]}; diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 0601db06..f651d237 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -23,7 +23,7 @@ /////////////////////////////////////////// `include "wally-config.vh" -// `include "../../../config/rv64icfd/wally-config.vh" +// `include "../../../config/rv64icfd/wally-config.vh" module fma( input logic clk, @@ -106,6 +106,7 @@ module fma1( logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [`NE-2:0] Denorm; // Denormalized input value /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -116,8 +117,9 @@ module fma1( /////////////////////////////////////////////////////////////////////////////// // verilator lint_off WIDTH + assign Denorm = FmtE ? 1 : -126+1023; assign ProdExpE = (XZeroE|YZeroE) ? 0 : - XExpE + YExpE - BiasE + XDenormE + YDenormE; + XExpE + YExpE - BiasE + ({`NE-1{XDenormE}}&Denorm) + ({`NE-1{YDenormE}}&Denorm); // verilator lint_on WIDTH // Calculate the product's mantissa @@ -133,7 +135,7 @@ module fma1( // - positive means the product is larger, so shift Z right // - Denormal numbers have an an exponent value of 1, however they are // represented with an exponent of 0. add one to the exponent if it is a denormal number - assign AlignCnt = ProdExpE - ZExpE - ZDenormE; + assign AlignCnt = ProdExpE - (ZExpE + ({`NE-1{ZDenormE}}&Denorm)); // Defualt Addition without shifting // | 54'b0 | 106'b(product) | 2'b0 | @@ -320,7 +322,9 @@ module fma2( //assign FracLen = `NF; // Determine if the result is denormal - assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4)); + logic [`NE+1:0] SumExpTmpTmp; + assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4)); + assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; @@ -511,7 +515,7 @@ module fma2( ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + (CalcPlus1&(AddendStickyM|FrmM[1])) : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv index 19ef41b9..a2534149 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -75,15 +75,8 @@ module fpdiv ( // div/sqrt // fdiv = 0 // fsqrt = 1 - - // Convert the input operands to their appropriate forms based on - // the orignal operands, the op_type , and their precision P. - // Single precision inputs are converted to double precision - // and the sign of the first operand is set appropratiately based on - // if the operation is absolute value or negation. - convert_inputs_div conv1 (.op1, .op2, .op_type, .P, - // outputs: - .Float1, .Float2b(Float2)); + assign Float1 = op1; + assign Float2 = op_type ? op1 : op2; // Test for exceptions and return the "Invalid Operation" and // "Denormalized" Input Flags. The "sel_inv" is used in diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 4e7d898e..04823580 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -57,7 +57,7 @@ module fpu ( // single stored in a double: | 32 1s | single precision value | // - sets the underflow after rounding - generate if (`F_SUPPORTED | `D_SUPPORTED) begin + generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu // control signals logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable @@ -67,7 +67,7 @@ module fpu ( logic FWriteIntD; // Write to integer register logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input @@ -97,7 +97,8 @@ module fpu ( logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE; // is X normal + logic XNormE,YNormE; // is normal + logic XNormM,YNormM; // is normal // result and flag signals @@ -171,7 +172,7 @@ module fpu ( flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); - flopenrc #(18) DECtrlReg3(clk, reset, FlushE, ~StallE, + flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); @@ -203,11 +204,11 @@ module fpu ( // unpacking unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FResultSelE, .FmtE, // outputs: .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE, .YNormE); // FMA // - two stage FMA @@ -222,7 +223,7 @@ module fpu ( .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), + .FOpCtrlE, .FOpCtrlM, .FmtE, .FmtM, .FrmM, // outputs: .FMAFlgM, .FMAResM); @@ -240,10 +241,10 @@ module fpu ( // - if not captured any forwarded inputs will change durring computation // - this problem is caused by stalling the execute stage // - the other units don't have this problem, only div/sqrt stalls the execute stage - flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), + flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), .en(1'b1), .clear(FDivSqrtDoneE), .reset(reset), .clk(FDivBusyE)); - flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), + flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), .en(1'b1), .clear(FDivSqrtDoneE), .reset(reset), .clk(FDivBusyE)); @@ -261,6 +262,8 @@ module fpu ( //*** remove uneeded logic //*** change to use the unpacking unit if possible faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, + .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM, + .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XNormM, .YNormM, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, // outputs: .FAddResM, .FAddFlgM); @@ -269,7 +272,7 @@ module fpu ( // - writes to FP file durring min/max instructions // - other comparisons write a 1 or 0 to the integer register fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), - .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), + .FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, // outputs: .Invalid(CmpNVE), .CmpResE); @@ -325,9 +328,9 @@ module fpu ( flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); + flopenrc #(18) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE}, + {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM}); diff --git a/wally-pipelined/src/fpu/rounder_denorm.sv b/wally-pipelined/src/fpu/rounder_denorm.sv index 2e1ad07e..3c9a0e91 100755 --- a/wally-pipelined/src/fpu/rounder_denorm.sv +++ b/wally-pipelined/src/fpu/rounder_denorm.sv @@ -1,4 +1,4 @@ -// The rounder takes as inputs a 64-bit value to be rounded, A, the +// The rounder takes as input logics a 64-bit value to be rounded, A, the // exponent of the value to be rounded, the sign of the final result, Sign, // the precision of the results, P, and the two-bit rounding mode, rm. // It produces a rounded 52-bit result, Z, the exponent of the rounded @@ -17,38 +17,34 @@ // where , denotes the rounding boundary. S is the logical OR of all the // bits to the right of R. -module rounder (Result, DenormIO, Flags, rm, P, OvEn, - UnEn, exp_valid, sel_inv, Invalid, DenormIn, convert, Asign, Aexp, - norm_shift, A, exponent_postsum, A_Norm, B_Norm, exp_A_unmodified, exp_B_unmodified, - normal_overflow, normal_underflow, swap, op_type, sum); - - input [2:0] rm; - input P; - input OvEn; - input UnEn; - input exp_valid; - input [3:0] sel_inv; - input Invalid; - input DenormIn; - input convert; - input Asign; - input [10:0] Aexp; - input [5:0] norm_shift; - input [63:0] A; - input [10:0] exponent_postsum; - input A_Norm; - input B_Norm; - input [11:0] exp_A_unmodified; - input [11:0] exp_B_unmodified; - input normal_overflow; - input normal_underflow; - input swap; - input [3:0] op_type; - input [63:0] sum; +module rounder ( + input logic [2:0] rm, + input logic P, + input logic OvEn, + input logic UnEn, + input logic exp_valid, + input logic [3:0] sel_inv, + input logic Invalid, + input logic DenormIn, + input logic Asign, + input logic [10:0] Aexp, + input logic [5:0] norm_shift, + input logic [63:0] A, + input logic [10:0] exponent_postsum, + input logic A_Norm, + input logic B_Norm, + input logic [11:0] exp_A_unmodified, + input logic [11:0] exp_B_unmodified, + input logic normal_overflow, + input logic normal_underflow, + input logic swap, + input logic [2:0] op_type, + input logic [63:0] sum, - output [63:0] Result; - output DenormIO; - output [4:0] Flags; + output logic [63:0] Result, + output logic DenormIO, + output logic [4:0] Flags +); wire Rsign; wire Sticky_out; @@ -87,7 +83,6 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, wire Cout_overflow; wire Texp_l7z; wire Texp_l7o; - wire OvCon; // Determine the sticky bits for double and single precision assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0]; @@ -152,7 +147,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z)); // Set the overflow and underflow flags. They should not be set if - // the input was infinite or NaN or the output of the adder is zero. + // the input logic was infinite or NaN or the output logic of the adder is zero. // 00 = Valid // 10 = NaN assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]); @@ -164,7 +159,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, assign OverFlow = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid; // The DenormIO is set if underflow has occurred or if their was a - // denormalized input. + // denormalized input logic. assign DenormIO = DenormIn | UnderFlow; // The final result is Inexact if any rounding occurred ((i.e., R or S @@ -192,7 +187,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, // -0 + +0 = -0 (for RD) assign Rzero = ~exp_valid | UnderFlow; assign Rsign = DenormIn ? - ( ~(op_type[2] | op_type[1] | op_type[0]) ? + ( ~(op_type[1] | op_type[0]) ? ( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ? ~Asign : Asign) : ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ? @@ -202,7 +197,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, (sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] | sel_inv[2]&sel_inv[1]&~sel_inv[0] | ~exp_valid&rm[1]&rm[0]&~sel_inv[2] | - UnderFlow&rm[1]&rm[0]) & ~convert) & ~sel_inv[3]) | + UnderFlow&rm[1]&rm[0])) & ~sel_inv[3]) | (Asign & sel_inv[3]) ); // The exponent of the final result is zero if the final result is @@ -218,7 +213,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, assign VeryLarge = OverFlow & ~OvEn; assign Infinite = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]); assign Largest = VeryLarge & Round_zero; - assign Adj_exp = OverFlow & OvEn & ~convert; + assign Adj_exp = OverFlow & OvEn; assign Rexp[10:1] = ({10{~Valid}} | {Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | @@ -230,7 +225,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, // Depending on the operation and the signs of the orignal operands, // underflow may or may not be needed to round. assign Rexp_denorm = DenormIn ? - ((~op_type[2] & ~op_type[1] & op_type[0]) ? + ((~op_type[1] & op_type[0]) ? ( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ? ( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) : ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) @@ -238,7 +233,7 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, ( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) : ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) ) : - (op_type[3]) ? exp_A_unmodified[10:0] : Rexp; //KEP used to be all of exp_A_unmodified + Rexp; //KEP used to be all of exp_A_unmodified // If the result is zero or infinity, the mantissa is all zeros. // If the result is NaN, the mantissa is 10...0 @@ -256,10 +251,9 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, // for the final result. A double precision result is returned if // overflow has occurred, the overflow trap is enabled, and a conversion // is being performed. - assign OvCon = OverFlow & OvEn & convert; - assign Result = (op_type[3]) ? {A[63:0]} : (DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : ((P&~OvCon) ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} - : {Rsign, Rexp, Rmant})); + assign Result = DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : (P ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} + : {Rsign, Rexp, Rmant}); endmodule // rounder diff --git a/wally-pipelined/src/fpu/unpacking.sv b/wally-pipelined/src/fpu/unpacking.sv index f1f595de..3913b06b 100644 --- a/wally-pipelined/src/fpu/unpacking.sv +++ b/wally-pipelined/src/fpu/unpacking.sv @@ -1,11 +1,12 @@ module unpacking ( input logic [63:0] X, Y, Z, input logic FmtE, + input logic [2:0] FResultSelE, input logic [2:0] FOpCtrlE, output logic XSgnE, YSgnE, ZSgnE, output logic [10:0] XExpE, YExpE, ZExpE, output logic [52:0] XManE, YManE, ZManE, - output logic XNormE, + output logic XNormE, YNormE, output logic XNaNE, YNaNE, ZNaNE, output logic XSNaNE, YSNaNE, ZSNaNE, output logic XDenormE, YDenormE, ZDenormE, @@ -25,12 +26,9 @@ module unpacking ( assign YSgnE = FmtE ? Y[63] : Y[31]; assign ZSgnE = FmtE ? Z[63] : Z[31]; - assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};//{X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; - assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};//{Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; - assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};//{Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]}; -/* assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; // *** maybe convert to full number of bits here? - assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]}; - assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};*/ + assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; + assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; + assign ZExpE = FmtE ? Z[62:52] : {Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]}; assign XFracE = FmtE ? X[51:0] : {X[22:0], 29'b0}; assign YFracE = FmtE ? Y[51:0] : {Y[22:0], 29'b0}; @@ -57,6 +55,7 @@ module unpacking ( assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23]; assign XNormE = ~(XExpMaxE|XExpZero); + assign YNormE = ~YExpZero; // only used in addcvt - checks inf and NaN seperately assign XNaNE = XExpMaxE & ~XFracZero; assign YNaNE = YExpMaxE & ~YFracZero; @@ -78,7 +77,6 @@ module unpacking ( assign YZeroE = YExpZero & YFracZero; assign ZZeroE = ZExpZero & ZFracZero; - assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed? - // assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision + assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision endmodule \ No newline at end of file