diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh index 1a7df3c4..e5ccc0bf 100644 --- a/wally-pipelined/config/rv64icfd/wally-config.vh +++ b/wally-pipelined/config/rv64icfd/wally-config.vh @@ -34,7 +34,7 @@ `define XLEN 64 // MISA RISC-V configuration per specification -`define MISA (32'h00000104 | 0 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) +`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) `define ZCSR_SUPPORTED 1 `define COUNTERS 32 `define ZCOUNTERS_SUPPORTED 1 diff --git a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv index 9a91bfdd..48dc16da 100644 --- a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv +++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv @@ -45,7 +45,7 @@ assign FOpCtrlE = 3'b0; // down - 010 // up - 011 // nearest max mag - 100 -assign FrmE = 3'b000; +assign FrmE = 3'b010; assign FmtE = 1'b1; @@ -55,8 +55,8 @@ assign ynan = FmtE ? &FInput2E[62:52] && |FInput2E[51:0] : &FInput2E[62:55] && | assign znan = FmtE ? &FInput3E[62:52] && |FInput3E[51:0] : &FInput3E[62:55] && |FInput3E[54:32]; assign ansnan = FmtE ? &ans[62:52] && |ans[51:0] : &ans[62:55] && |ans[54:32]; // instantiate device under test -fma1 UUT1(.*); -fma2 UUT2(.FInput1M(FInput1E), .FInput2M(FInput2E), .FInput3M(FInput3E), .FrmM(FrmE), .ProdManM(ProdManE), +fma1 UUT1(.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .*); +fma2 UUT2(.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FrmM(FrmE), .ProdManM(ProdManE), .AlignedAddendM(AlignedAddendE), .ProdExpM(ProdExpE), .AddendStickyM(AddendStickyE),.KillProdM(KillProdE), .FOpCtrlM(FOpCtrlE), .XZeroM(XZeroE),.YZeroM(YZeroE),.ZZeroM(ZZeroE),.XInfM(XInfE),.YInfM(YInfE),.ZInfM(ZInfE),.XNaNM(XNaNE),.YNaNM(YNaNE),.ZNaNM(ZNaNE), .FmtM(FmtE), .*); @@ -110,7 +110,7 @@ always @(posedge clk) if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN "); if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN "); errors = errors + 1; - if (errors == 40) + // if (errors == 40) $stop; end if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin @@ -131,7 +131,7 @@ always @(posedge clk) if(&ans[62:55] && |ans[54:32] && ~ans[54] ) $display( "ans=sigNaN "); if(&ans[62:55] && |ans[54:32] && ans[54]) $display( "ans=qutNaN "); errors = errors + 1; - if (errors == 10) + //if (errors == 10) $stop; end vectornum = vectornum + 1; @@ -140,4 +140,4 @@ always @(posedge clk) $stop; end end -endmodule \ No newline at end of file +endmodule diff --git a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh index 0741e9d6..a8dd70b8 100755 --- a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh +++ b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh @@ -1,3 +1,3 @@ -testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat +testfloat_gen f64_mulAdd -tininessbefore -n 6133248 -rmin -seed 113355 -level 1 > testFloat tr -d ' ' < testFloat > testFloatNoSpace diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index 94143b87..5749d0db 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -19,8 +19,6 @@ module fctrl ( logic IllegalFPUInstr1D, IllegalFPUInstr2D; - //precision is taken directly from instruction - assign FmtD = Funct7D[0]; // *** fix rounding for dynamic rounding assign FrmD = &Funct3D ? FRM_REGW : Funct3D; @@ -211,6 +209,9 @@ module fctrl ( endcase end + //precision + assign FmtD = (~&FResultSelD & Funct7D[0]) | (&FResultSelD & FOpCtrlD[0]); + assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D; //write to integer source if conv to int occurs //AND of Funct7 for int results diff --git a/wally-pipelined/src/fpu/fma1.sv b/wally-pipelined/src/fpu/fma1.sv index dd2cc585..ab9d2bb1 100644 --- a/wally-pipelined/src/fpu/fma1.sv +++ b/wally-pipelined/src/fpu/fma1.sv @@ -1,8 +1,8 @@ module fma1( - input logic [63:0] FInput1E, // X - input logic [63:0] FInput2E, // Y - input logic [63:0] FInput3E, // Z + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtE, // precision 1 = double 0 = single output logic [105:0] ProdManE, // 1.X frac * 1.Y frac @@ -21,8 +21,8 @@ module fma1( logic [12:0] AlignCnt; // how far to shift the addend to align with the product logic [211:0] ZManShifted; // output of the alignment shifter including sticky bit logic [211:0] ZManPreShifted; // input to the alignment shifter - logic XDenormE, YDenormE, ZDenormE; // inputs are denormal - logic [63:0] FInput3E2; // value to add (Z or zero) + logic XDenorm, YDenorm, ZDenorm; // inputs are denormal + logic [63:0] Addend; // value to add (Z or zero) logic [12:0] Bias; // 1023 for double, 127 for single logic XExpZero, YExpZero, ZExpZero; // input exponent zero logic XFracZero, YFracZero, ZFracZero; // input fraction zero @@ -34,19 +34,19 @@ module fma1( /////////////////////////////////////////////////////////////////////////////// // Set addend to zero if FMUL instruction - assign FInput3E2 = FOpCtrlE[2] ? 64'b0 : FInput3E; + assign Addend = FOpCtrlE[2] ? 64'b0 : Z; - assign XSgn = FInput1E[63]; - assign YSgn = FInput2E[63]; - assign ZSgn = FInput3E2[63]; + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]; - assign XExp = FmtE ? {2'b0, FInput1E[62:52]} : {5'b0, FInput1E[62:55]}; - assign YExp = FmtE ? {2'b0, FInput2E[62:52]} : {5'b0, FInput2E[62:55]}; - assign ZExp = FmtE ? {2'b0, FInput3E2[62:52]} : {5'b0, FInput3E2[62:55]}; + assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; + assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; + assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; - assign XFrac = FmtE ? FInput1E[51:0] : {FInput1E[54:32], 29'b0}; - assign YFrac = FmtE ? FInput2E[51:0] : {FInput2E[54:32], 29'b0}; - assign ZFrac = FmtE ? FInput3E2[51:0] : {FInput3E2[54:32], 29'b0}; + assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; + assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; + assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; assign XMan = {~XExpZero, XFrac}; assign YMan = {~YExpZero, YFrac}; @@ -76,9 +76,9 @@ module fma1( assign YNaNE = YExpMax & ~YFracZero; assign ZNaNE = ZExpMax & ~ZFracZero; - assign XDenormE = XExpZero & ~XFracZero; - assign YDenormE = YExpZero & ~YFracZero; - assign ZDenormE = ZExpZero & ~ZFracZero; + assign XDenorm = XExpZero & ~XFracZero; + assign YDenorm = YExpZero & ~YFracZero; + assign ZDenorm = ZExpZero & ~ZFracZero; assign XInfE = XExpMax & XFracZero; assign YInfE = YExpMax & YFracZero; @@ -101,7 +101,7 @@ module fma1( // verilator lint_off WIDTH assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : - XExp + YExp - Bias + XDenormE + YDenormE; + XExp + YExp - Bias + XDenorm + YDenorm; // Calculate the product's mantissa // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. @@ -124,7 +124,7 @@ module fma1( // - positive means the product is larger, so shift Z right // - Denormal numbers have an an exponent value of 1, however they are // represented with an exponent of 0. add one to the exponent if it is a denormal number - assign AlignCnt = ProdExpE - ZExp - ZDenormE; + assign AlignCnt = ProdExpE - ZExp - ZDenorm; // verilator lint_on WIDTH diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 8d12431a..89a059dc 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -1,8 +1,8 @@ module fma2( - input logic [63:0] FInput1M, // X - input logic [63:0] FInput2M, // Y - input logic [63:0] FInput3M, // Z + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtM, // precision 1 = double 0 = single @@ -32,7 +32,7 @@ module fma2( logic [12:0] SumExp; // exponent of the normalized sum logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 - logic [12:0] ResultExpTmp; // ResultExp with bits to determine sign and overflow + logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow logic [53:0] NormSum; // normalized sum logic [161:0] SumShifted; // sum shifted for normalization logic [8:0] NormCnt; // output of the leading zero detector @@ -42,17 +42,18 @@ module fma2( logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) logic ResultDenorm; // is the result denormalized logic Sticky; // Sticky bit - logic Plus1, Minus1, Plus1Tmp, Minus1Tmp; // do you add or subtract one for rounding + logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding logic Invalid,Underflow,Overflow,Inexact; // flags logic [8:0] DenormShift; // right shift if the result is denormalized logic SubBySmallNum; // was there supposed to be a subtraction by a small number - logic [63:0] FInput3M2; // value to add (Z or zero) + logic [63:0] Addend; // value to add (Z or zero) logic ZeroSgn; // the result's sign if the sum is zero logic ResultSgnTmp; // the result's sign assuming the result is not zero logic Guard, Round, LSBNormSum; // bits needed to determine rounding logic [12:0] MaxExp; // maximum value of the exponent logic [12:0] FracLen; // length of the fraction logic SigNaN; // is an input a signaling NaN + logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results @@ -62,15 +63,15 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// // Set addend to zero if FMUL instruction - assign FInput3M2 = FOpCtrlM[2] ? 64'b0 : FInput3M; + assign Addend = FOpCtrlM[2] ? 64'b0 : Z; // split inputs into the sign bit, and exponent to handle single or double precision // - single precision is in the top half of the inputs - assign XSgn = FInput1M[63]; - assign YSgn = FInput2M[63]; - assign ZSgn = FInput3M2[63]^FOpCtrlM[0]; //Negate Z if subtraction + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction - assign ZExp = FmtM ? FInput3M2[62:52] : {3'b0, FInput3M2[62:55]}; + assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; @@ -207,28 +208,28 @@ module fma2( always_comb begin // Determine if you add 1 case (FrmM) - 3'b000: Plus1Tmp = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even - 3'b001: Plus1Tmp = 0;//round to zero - 3'b010: Plus1Tmp = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: Plus1Tmp = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up - 3'b100: Plus1Tmp = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude - default: Plus1Tmp = 1'bx; + 3'b000: CalcPlus1 = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up + 3'b100: CalcPlus1 = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude + default: CalcPlus1 = 1'bx; endcase // Determine if you subtract 1 case (FrmM) - 3'b000: Minus1Tmp = 0;//round to nearest even - 3'b001: Minus1Tmp = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: Minus1Tmp = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: Minus1Tmp = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up - 3'b100: Minus1Tmp = 0;//round to nearest max magnitude - default: Minus1Tmp = 1'bx; + 3'b000: CalcMinus1 = 0;//round to nearest even + 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero + 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up + 3'b100: CalcMinus1 = 0;//round to nearest max magnitude + default: CalcMinus1 = 1'bx; endcase end // If an answer is exact don't round - assign Plus1 = Plus1Tmp & (Sticky | Guard | Round); - assign Minus1 = Minus1Tmp & (Sticky | Guard | Round); + assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); + assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); // Compute rounded result logic [64:0] RoundAdd; @@ -237,8 +238,8 @@ module fma2( Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; assign NormSumTruncated = FmtM ? NormSum[53:2] : {NormSum[53:31], 29'b0}; - assign {ResultExpTmp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; - assign ResultExp = ResultExpTmp[10:0]; + assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; + assign ResultExp = FullResultExp[10:0]; @@ -277,27 +278,27 @@ module fma2( // 2) 0 * Inf // 3) any input is a signaling NaN assign MaxExp = FmtM ? 13'd2047 : 13'd255; - assign SigNaN = FmtM ? (XNaNM&~FInput1M[51]) | (YNaNM&~FInput2M[51]) | (ZNaNM&~FInput3M2[51]) : - (XNaNM&~FInput1M[54]) | (YNaNM&~FInput2M[54]) | (ZNaNM&~FInput3M2[54]); - assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (XSgn ^ YSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); + assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : + (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); + assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); // Set Overflow flag if the number is too big to be represented // - Don't set the overflow flag if an overflowed result isn't outputed - assign Overflow = ResultExpTmp >= MaxExp & ~ResultExpTmp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Set Underflow flag if the number is too small to be represented in normal numbers - logic ProdUf; - assign ProdUf = ProdExpM <= 1; - // assign Underflow = ResultExpTmp[12] | (KillProdM&AddendStickyM&ZZeroM) | (~(|ResultExpTmp)&ResultDenorm&(Round|Guard|Sticky)) | Plus1&ResultDenorm&(ResultExp == 1); + // - Don't set the underflow flag if the result is exact assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)) )&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = Underflow | (FullResultExp == 0)&Minus1; // before rounding option + // assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); //after rounding option // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed result isn't outputed assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Combine flags // - FMA can't set the Divide by zero flag - // - Don't set the underflow flag if the result is exact - assign FmaFlagsM = {Invalid, 1'b0, Overflow, Underflow & ~(ResultExpTmp == 1), Inexact}; + // - Don't set the underflow flag if the result was rounded up to a normal number + assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; @@ -308,23 +309,23 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// // Select the result /////////////////////////////////////////////////////////////////////////////// - assign XNaNResult = FmtM ? {XSgn, FInput1M[62:52], 1'b1,FInput1M[50:0]} : {XSgn, FInput1M[62:55], 1'b1,FInput1M[53:0]}; - assign YNaNResult = FmtM ? {YSgn, FInput2M[62:52], 1'b1,FInput2M[50:0]} : {YSgn, FInput2M[62:55], 1'b1,FInput2M[53:0]}; - assign ZNaNResult = FmtM ? {ZSgn, FInput3M2[62:52], 1'b1,FInput3M2[50:0]} : {ZSgn, FInput3M2[62:55], 1'b1,FInput3M2[53:0]}; + assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; + assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; + assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : {ResultSgn, 11'h7ff, 52'b0} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : {ResultSgn, 8'hff, 55'b0}; assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; - assign KillProdResult = FmtM ?{ResultSgn, FInput3M2[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, FInput3M2[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; - assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (Plus1Tmp&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (Plus1Tmp&(AddendStickyM|FrmM[1]))}, 32'b0}; + assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; + assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; assign FmaResultM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : ZNaNM ? ZNaNResult : Invalid ? InvalidResult : // has to be before inf - XInfM ? {PSgn, FInput1M[62:0]} : - YInfM ? {PSgn, FInput2M[62:0]} : - ZInfM ? {ZSgn, FInput3M2[62:0]} : + XInfM ? {PSgn, X[62:0]} : + YInfM ? {PSgn, Y[62:0]} : + ZInfM ? {ZSgn, Addend[62:0]} : Overflow ? OverflowResult : KillProdM ? KillProdResult : // has to be after Underflow Underflow & ~ResultDenorm ? UnderflowResult : diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index e85d4743..016f004a 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -61,6 +61,7 @@ module fpu ( logic FInput3UsedD; // Is input 3 used logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic SelLoadInputE, SelLoadInputM; // regfile signals //*** KEP lint warning - changed `XLEN-1 to 63 logic [4:0] RdE, RdM, RdW; // ***Can take from ieu @@ -70,7 +71,7 @@ module fpu ( logic [63:0] FInput1E, FInput1M, FInput1tmpE; logic [63:0] FInput2E, FInput2M; logic [63:0] FInput3E, FInput3M; - logic [63:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions + logic [63:0] FLoadResultM, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions // div/sqrt signals logic DivDenormE, DivDenormM, DivDenormW; @@ -139,7 +140,7 @@ module fpu ( logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; // instantiation of W stage regfile signals - logic [`XLEN-1:0] SrcAW; + logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; // classify signals logic [63:0] ClassResultE, ClassResultM, ClassResultW; @@ -207,16 +208,18 @@ module fpu ( flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E); flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); + flopenrc #(1) DEReg18(clk, reset, PipeClearDE, PipeEnableDE, InstrD[15], SelLoadInputE); //EXECUTION STAGE - // input muxs for forwarding - mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, {SrcAM, {64-`XLEN{1'b0}}}, FForwardInput1E, FInput1tmpE); + // input muxs for forwarding + mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); + mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, FForwardInput1E, FInput1tmpE); mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); - fma1 fma1 (.FOpCtrlE(FOpCtrlE[2:0]),.*); + fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*); // first and only instance of floating-point divider logic fpdivClk; @@ -337,6 +340,7 @@ module fpu ( flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM); flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); + flopenrc #(1) EMReg9(clk, reset, PipeClearEM, PipeEnableEM, SelLoadInputE, SelLoadInputM); //***************** // fpuclassify E/M pipe registers @@ -345,11 +349,13 @@ module fpu ( //BEGIN MEMORY STAGE - assign FWriteDataM = FInput1M[63:64-`XLEN]; + assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]}; + //adjecent adress values are sent to the FPU, select the correct one + // -imm is 80000 most of the time vs the error one which is 00000 + mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); + mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); - mux2 #(64) FLoadStoreResultMux({HRDATA, {64-`AHBW{1'b0}}}, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); - - fma2 fma2(.FOpCtrlM(FOpCtrlM[2:0]), .*); + fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*); // second instance of two-stage floating-point add/cvt unit fpuaddcvt2 fpadd2 (.*); @@ -357,7 +363,9 @@ module fpu ( // second instance of two-stage floating-point comparator fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*); - + + mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); + //***************** // fma M/W pipe registers //***************** @@ -397,7 +405,7 @@ module fpu ( flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); - flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW); + flopenrc #(64) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, AlignedSrcAM, SrcAW); flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW); flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); @@ -447,7 +455,7 @@ module fpu ( // classify 3'b101 : FPUResult64W = ClassResultW; // output SrcAW - 3'b110 : FPUResult64W = {SrcAW, {64-`XLEN{1'b0}}}; + 3'b110 : FPUResult64W = SrcAW; // Load/Store/Move to FP-register 3'b111 : FPUResult64W = FLoadStoreResultW; default : FPUResult64W = {64{1'bx}}; @@ -460,7 +468,7 @@ module fpu ( // define offsets for LSB zero extension or truncation always_comb begin // zero extension - FPUResultW = FPUResult64W[63:64-`XLEN]; + FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; SetFflagsM = FPUFlagsW; end diff --git a/wally-pipelined/src/fpu/fpuclassify.sv b/wally-pipelined/src/fpu/fpuclassify.sv index ee03cb52..1000bdf4 100644 --- a/wally-pipelined/src/fpu/fpuclassify.sv +++ b/wally-pipelined/src/fpu/fpuclassify.sv @@ -43,8 +43,10 @@ module fpuclassify ( // bit 7 - +infinity // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResultE = {{`XLEN-10{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {64-`XLEN{1'b0}}}; + assign ClassResultE = FmtE ? {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, + ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity} : + {{22{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, + ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {32{1'b0}}}; endmodule diff --git a/wally-pipelined/src/fpu/fpucmp2.sv b/wally-pipelined/src/fpu/fpucmp2.sv index e2820688..42a780ac 100755 --- a/wally-pipelined/src/fpu/fpucmp2.sv +++ b/wally-pipelined/src/fpu/fpucmp2.sv @@ -45,6 +45,7 @@ module fpucmp2 ( input logic ANaN, BNaN, input logic Azero, Bzero, input logic [3:0] FOpCtrlM, + input logic FmtM, output logic Invalid, // Invalid Operation output logic [1:0] FCC, // Condition Codes @@ -160,6 +161,7 @@ endmodule // magcompare64b module exception_cmp_2 ( input logic [63:0] A, input logic [63:0] B, + input logic FmtM, input logic LT_mag, input logic EQ_mag, input logic [1:0] Sel, @@ -230,11 +232,12 @@ module exception_cmp_2 ( case (FOpCtrlM[2:0]) 3'b111: FCmpResultM = LT ? A : B;//min 3'b101: FCmpResultM = GT ? A : B;//max - 3'b010: FCmpResultM = {63'b0, EQ};//equal - 3'b001: FCmpResultM = {63'b0, LT};//less than - 3'b011: FCmpResultM = {63'b0, LT | EQ};//less than or equal + 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal + 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than + 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal default: FCmpResultM = 64'b0; endcase end + endmodule // exception_cmp diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index f87f369b..7fa1e695 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -90,17 +90,21 @@ string tests32f[] = '{ }; string tests64f[] = '{ - "rv64f/I-FADD-S-01", "2000", + // "rv64f/I-FLW-01", "2110", + "rv64f/I-FMV-W-X-01", "2000", + "rv64f/I-FMV-X-W-01", "2000", + "rv64f/I-FSW-01", "2000", "rv64f/I-FCLASS-S-01", "2000", - "rv64f/I-FCVT-S-L-01", "2000", - "rv64f/I-FCVT-S-LU-01", "2000", - "rv64f/I-FCVT-S-W-01", "2000", - "rv64f/I-FCVT-S-WU-01", "2000", - "rv64f/I-FCVT-L-S-01", "2000", - "rv64f/I-FCVT-LU-S-01", "2000", - "rv64f/I-FCVT-W-S-01", "2000", - "rv64f/I-FCVT-WU-S-01", "2000", - "rv64f/I-FDIV-S-01", "2000", + "rv64f/I-FADD-S-01", "2000", + // "rv64f/I-FCVT-S-L-01", "2000", + // "rv64f/I-FCVT-S-LU-01", "2000", + // "rv64f/I-FCVT-S-W-01", "2000", + // "rv64f/I-FCVT-S-WU-01", "2000", + // "rv64f/I-FCVT-L-S-01", "2000", + // "rv64f/I-FCVT-LU-S-01", "2000", + // "rv64f/I-FCVT-W-S-01", "2000", + // "rv64f/I-FCVT-WU-S-01", "2000", + // "rv64f/I-FDIV-S-01", "2000", "rv64f/I-FEQ-S-01", "2000", "rv64f/I-FLE-S-01", "2000", "rv64f/I-FLT-S-01", "2000", @@ -109,20 +113,19 @@ string tests32f[] = '{ "rv64f/I-FMIN-S-01", "2000", "rv64f/I-FMSUB-S-01", "2000", "rv64f/I-FMUL-S-01", "2000", - "rv64f/I-FMV-W-X-01", "2000", "rv64f/I-FNMADD-S-01", "2000", "rv64f/I-FNMSUB-S-01", "2000", "rv64f/I-FSGNJ-S-01", "2000", "rv64f/I-FSGNJN-S-01", "2000", "rv64f/I-FSGNJX-S-01", "2000", - "rv64f/I-FSQRT-S-01", "2000", - "rv64f/I-FSW-01", "2000", - "rv64f/I-FLW-01", "2000", + // "rv64f/I-FSQRT-S-01", "2000", "rv64f/I-FSUB-S-01", "2000" }; string tests64d[] = '{ // "rv64d/I-FDIV-D-01", "2000", + "rv64d/I-FSD-01", "2000", + "rv64d/I-FLD-01", "2420", "rv64d/I-FNMADD-D-01", "2000", "rv64d/I-FNMSUB-D-01", "2000", "rv64d/I-FMSUB-D-01", "2000", @@ -143,8 +146,6 @@ string tests32f[] = '{ // "rv64d/I-FCVT-S-D-01", "2000", // "rv64d/I-FCVT-W-D-01", "2000", // "rv64d/I-FCVT-WU-D-01", "2000", - "rv64d/I-FSD-01", "2000", - "rv64d/I-FLD-01", "2420", "rv64d/I-FMADD-D-01", "2000", "rv64d/I-FMUL-D-01", "2000", "rv64d/I-FMV-D-X-01", "2000", @@ -538,8 +539,8 @@ string tests32f[] = '{ if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; - // if (`F_SUPPORTED) tests = {tests64f, tests}; if (`D_SUPPORTED) tests = {tests64d, tests}; + if (`F_SUPPORTED) tests = {tests64f, tests}; end //tests = {tests64a, tests}; end else begin // RV32