diff --git a/wally-pipelined/src/fpu/fclassify.sv b/wally-pipelined/src/fpu/fclassify.sv index 35475ed5..35e9a80c 100644 --- a/wally-pipelined/src/fpu/fclassify.sv +++ b/wally-pipelined/src/fpu/fclassify.sv @@ -3,43 +3,18 @@ module fclassify ( input logic XSgnE, - input logic [51:0] XFracE, input logic XNaNE, input logic XSNaNE, input logic XNormE, input logic XDenormE, input logic XZeroE, input logic XInfE, - // input logic FmtE, // 0-Single 1-Double output logic [63:0] ClassResE ); - // logic XSgnE; - // logic Inf, NaN, Zero, Norm, Denorm; logic PInf, PZero, PNorm, PDenorm; logic NInf, NZero, NNorm, NDenorm; - // logic MaxExp, ExpZero, ManZero, FirstBitFrac; - - // Single and Double precision layouts - // assign XSgnE = FmtE ? FSrcXE[63] : FSrcXE[31]; - // basic calculations for readabillity - - // assign ExpZero = FmtE ? ~|FSrcXE[62:52] : ~|FSrcXE[30:23]; - // assign MaxExp = FmtE ? &FSrcXE[62:52] : &FSrcXE[30:23]; - // assign ManZero = FmtE ? ~|FSrcXE[51:0] : ~|FSrcXE[22:0]; - // assign FirstBitFrac = FmtE ? FSrcXE[51] : FSrcXE[22]; - - // determine the type of number - // assign NaN = MaxExp & ~ManZero; - // assign Inf = MaxExp & ManZero; - // assign Zero = ExpZero & ManZero; - // assign Denorm= ExpZero & ~ManZero; - // assign Norm = ~ExpZero; - - // determine the sub categories - // assign QNaN = FirstBitFrac&NaN; - // assign SNaN = ~FirstBitFrac&NaN; assign PInf = ~XSgnE&XInfE; assign NInf = XSgnE&XInfE; assign PNorm = ~XSgnE&XNormE; diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv index 3ee0be0f..665b69ea 100644 --- a/wally-pipelined/src/fpu/fcvt.sv +++ b/wally-pipelined/src/fpu/fcvt.sv @@ -3,8 +3,7 @@ module fcvt ( input logic XSgnE, input logic [10:0] XExpE, - input logic [51:0] XFracE, - input logic XAssumed1E, + input logic [52:0] XManE, input logic XZeroE, input logic XNaNE, input logic XInfE, @@ -65,7 +64,8 @@ module fcvt ( // assign Bias = FmtE ? 12'h3ff : 12'h7f; assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101))); assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE))); - assign SubBits = In64 ? 8'd64 : 8'd32; + //assign SubBits = In64 ? 8'd64 : 8'd32; + assign SubBits = 8'd64; assign Bits = Res64 ? 8'd64 : 8'd32; // calulate the unbiased exponent @@ -108,12 +108,12 @@ module fcvt ( // select the shift value and amount based on operation (to fp or int) assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP; - assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XAssumed1E, XFracE} : {PosInt, 52'b0}; + assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XManE} : {PosInt, 52'b0}; // if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds) // if the shift is negitive add a bit for sticky bit calculation // otherwise shift left - assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, XAssumed1E, XFracE[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt; + assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, XManE[52:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt; // truncate the shifted mantissa assign ShiftedMan = ShiftedManTmp[64+51:50]; @@ -121,7 +121,7 @@ module fcvt ( // calculate sticky bit // - take into account the possible right shift from before // - the sticky bit calculation covers three diffrent sizes depending on the opperation - assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFracE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); + assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); // determine guard, round, and least significant bit of the result diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index a4105aa5..faab2012 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -35,11 +35,10 @@ module fma( input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic XSgnE, YSgnE, ZSgnE, input logic [`NE-1:0] XExpE, YExpE, ZExpE, - input logic [`NF-1:0] XFracE, YFracE, ZFracE, + input logic [`NF:0] XManE, YManE, ZManE, input logic XSgnM, YSgnM, ZSgnM, - input logic [`NE-1:0] XExpM, YExpM, ZExpM, - input logic [`NF-1:0] XFracM, YFracM, ZFracM, - input logic XAssumed1E, YAssumed1E, ZAssumed1E, + input logic [`NE-1:0] XExpM, YExpM, ZExpM, // ***needed + input logic [`NF:0] XManM, YManM, ZManM, input logic XDenormE, YDenormE, ZDenormE, input logic XZeroE, YZeroE, ZZeroE, input logic XNaNM, YNaNM, ZNaNM, @@ -57,8 +56,8 @@ module fma( logic AddendStickyE, AddendStickyM; logic KillProdE, KillProdM; - fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, - .BiasE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, + fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .BiasE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE, .ProdExpE, .AddendStickyE, .KillProdE); @@ -69,7 +68,7 @@ module fma( {AddendStickyE, KillProdE}, {AddendStickyM, KillProdM}); - fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, + fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, .FOpCtrlM, .FrmM, .FmtM, .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, @@ -82,8 +81,7 @@ endmodule module fma1( // input logic XSgnE, YSgnE, ZSgnE, input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format - input logic [`NF-1:0] XFracE, YFracE, ZFracE, // fractions in U(0.NF) format] - input logic XAssumed1E, YAssumed1E, ZAssumed1E, + input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format] input logic XDenormE, YDenormE, ZDenormE, input logic XZeroE, YZeroE, ZZeroE, input logic [`NE-1:0] BiasE, @@ -99,7 +97,7 @@ module fma1( logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits? logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1) - + /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -114,8 +112,8 @@ module fma1( // verilator lint_on WIDTH // Calculate the product's mantissa - // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. - assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE}; + // - Mantissa includes the assumed one. If the number is denormalized or zero, it does not have an assumed one. + assign ProdManE = XManE * YManE; /////////////////////////////////////////////////////////////////////////////// // Alignment shifter @@ -133,7 +131,7 @@ module fma1( // |1'b0| addnend | // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {(`NF+3)'(0), {ZAssumed1E, ZFracE}, /*106*/(2*`NF+2)'(0)}; + assign ZManPreShifted = {(`NF+3)'(0), ZManE, /*106*/(2*`NF+2)'(0)}; always_comb begin @@ -143,7 +141,7 @@ module fma1( // | addnend | if ($signed(AlignCnt) <= $signed(-(`NF+4))) begin KillProdE = 1; - ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0}; + ZManShifted = ZManPreShifted;//{107'b0, XManE, 54'b0}; AddendStickyE = ~(XZeroE|YZeroE); // If the Addend is shifted left (negitive AlignCnt) @@ -185,7 +183,7 @@ module fma2( input logic XSgnM, YSgnM, ZSgnM, input logic [`NE-1:0] XExpM, YExpM, ZExpM, - input logic [`NF-1:0] XFracM, YFracM, ZFracM, + input logic [`NF:0] XManM, YManM, ZManM, input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtM, // precision 1 = double 0 = single @@ -202,7 +200,6 @@ module fma2( output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} - logic [`NF-1:0] ResultFrac; // Result fraction logic [`NE-1:0] ResultExp; // Result exponent logic ResultSgn; // Result sign @@ -239,11 +236,12 @@ module fma2( logic SigNaN; // is an input a signaling NaN logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency) logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - + logic ZSgnEffM; // Calculate the product's sign // Negate product's sign if FNMADD or FNMSUB + assign PSgn = XSgnM ^ YSgnM ^ FOpCtrlM[1]; @@ -255,7 +253,8 @@ module fma2( // Negate Z when doing one of the following opperations: // -prod + Z // prod - Z - assign InvZ = ZSgnM ^ PSgn; + assign ZSgnEffM = ZSgnM^FOpCtrlM[0]; // Swap sign of Z for subtract + assign InvZ = ZSgnEffM ^ PSgn; // Choose an inverted or non-inverted addend - the one is added later assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; @@ -307,7 +306,8 @@ module fma2( assign SumZero = ~(|Sum); // determine the length of the fraction based on precision - assign FracLen = FmtM ? `NF : 13'd23; + //assign FracLen = FmtM ? `NF : 13'd23; + assign FracLen = `NF; // Determine if the result is denormal assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4)); @@ -374,7 +374,7 @@ module fma2( assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31]; // Deterimine if a small number was supposed to be subtrated - assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; + assign SubBySmallNum = AddendStickyM & InvZ & ~(NormSumSticky) & ~ZZeroM; always_comb begin // Determine if you add 1 @@ -435,13 +435,13 @@ module fma2( // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity // otherwise psign - assign ZeroSgn = (PSgn^ZSgnM)&~Underflow ? FrmM == 3'b010 : PSgn; + assign ZeroSgn = (PSgn^ZSgnEffM)&~Underflow ? FrmM == 3'b010 : PSgn; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive // if -p - z then the Sum is negitive - assign ResultSgnTmp = InvZ&(ZSgnM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnM)&PSgn); + assign ResultSgnTmp = InvZ&(ZSgnEffM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnEffM)&PSgn); assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; @@ -460,7 +460,7 @@ module fma2( // 3) 0 * Inf assign MaxExp = FmtM ? {`NE{1'b1}} : 13'd255; assign SigNaN = XSNaNM | YSNaNM | ZSNaNM; - assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); + assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); // Set Overflow flag if the number is too big to be represented // - Don't set the overflow flag if an overflowed result isn't outputed @@ -490,29 +490,29 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// // Select the result /////////////////////////////////////////////////////////////////////////////// - assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]}; - assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]}; - assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[`NF-2:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]}; + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; + assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + (CalcPlus1&(AddendStickyM|FrmM[1])) : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : ZNaNM ? ZNaNResult : Invalid ? InvalidResult : // has to be before inf - XInfM ? FmtM ? {PSgn, XExpM, XFracM} : {{32{1'b1}}, PSgn, XExpM[7:0], XFracM[51:29]} : - YInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} : - ZInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} : + XInfM ? FmtM ? {PSgn, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgn, XExpM[7:0], XManM[51:29]} : + YInfM ? FmtM ? {PSgn, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgn, YExpM[7:0], YManM[51:29]} : + ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} : Overflow ? OverflowResult : KillProdM ? KillProdResult : // has to be after Underflow Underflow & ~ResultDenorm ? UnderflowResult : FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]}; - +// *** use NF where needed endmodule \ No newline at end of file diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 344500d4..5022e82a 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -68,13 +68,12 @@ module fpu ( logic [`XLEN-1:0] FSrcXMAligned; logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding) + logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) // unpacking signals logic XSgnE, YSgnE, ZSgnE; logic [10:0] XExpE, YExpE, ZExpE; - logic [51:0] XFracE, YFracE, ZFracE; - logic XAssumed1E, YAssumed1E, ZAssumed1E; + logic [52:0] XManE, YManE, ZManE; logic XNaNE, YNaNE, ZNaNE; logic XSNaNE, YSNaNE, ZSNaNE; logic XDenormE, YDenormE, ZDenormE; @@ -86,7 +85,7 @@ module fpu ( logic XSgnM, YSgnM, ZSgnM; logic [10:0] XExpM, YExpM, ZExpM; - logic [51:0] XFracM, YFracM, ZFracM; + logic [52:0] XManM, YManM, ZManM; logic XNaNM, YNaNM, ZNaNM; logic XSNaNM, YSNaNM, ZSNaNM; logic XZeroM, YZeroM, ZZeroM; @@ -162,25 +161,26 @@ module fpu ( // Hazard unit for FPU fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); - + // forwarding muxs mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); - mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE); - + mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); + mux2 #(64) fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, - .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, - .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, + .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); // first of two-stage instance of floating-point fused multiply-add unit fma fma (.clk, .reset, .FlushM, .StallM, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, . - ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, . + ZManE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, - .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, - .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, + .YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); @@ -216,17 +216,17 @@ module fpu ( .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); // first and only instance of floating-point comparator - fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, + fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); - + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); + // first and only instance of floating-point classify unit - fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); + fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); - fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); + fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); // output for store instructions assign FWriteDataE = FSrcYE[`XLEN-1:0]; @@ -237,9 +237,9 @@ module fpu ( flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); - flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM}); - flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM}); - flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM}); + flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); + flopenrc #(65) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); + flopenrc #(65) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM}); flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 67865b64..83cb940f 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -2,8 +2,7 @@ module fsgn ( input logic XSgnE, YSgnE, - input logic [10:0] XExpE, - input logic [51:0] XFracE, + input logic [63:0] FSrcXE, input logic XExpMaxE, input logic FmtE, input logic [1:0] SgnOpCodeE, @@ -21,7 +20,7 @@ module fsgn ( // assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]); - assign SgnResE = FmtE ? {ResSgn, XExpE, XFracE} : {{32{1'b1}}, ResSgn, XExpE[7:0], XFracE[51:29]}; + assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]}; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will diff --git a/wally-pipelined/src/fpu/unpacking.sv b/wally-pipelined/src/fpu/unpacking.sv index 1133a403..914e9979 100644 --- a/wally-pipelined/src/fpu/unpacking.sv +++ b/wally-pipelined/src/fpu/unpacking.sv @@ -4,8 +4,7 @@ module unpacking ( input logic [2:0] FOpCtrlE, output logic XSgnE, YSgnE, ZSgnE, output logic [10:0] XExpE, YExpE, ZExpE, - output logic [51:0] XFracE, YFracE, ZFracE, - output logic XAssumed1E, YAssumed1E, ZAssumed1E, + output logic [52:0] XManE, YManE, ZManE, output logic XNormE, output logic XNaNE, YNaNE, ZNaNE, output logic XSNaNE, YSNaNE, ZSNaNE, @@ -15,41 +14,48 @@ module unpacking ( output logic XInfE, YInfE, ZInfE, output logic XExpMaxE ); - //***rename to make significand = 1.frac m = significand + + logic [51:0] XFracE, YFracE, ZFracE; + logic XExpNonzero, YExpNonzero, ZExpNonzero; logic XFracZero, YFracZero, ZFracZero; // input fraction zero logic XExpZero, YExpZero, ZExpZero; // input exponent zero - logic [63:0] Addend; // value to add (Z or zero) logic YExpMaxE, ZExpMaxE; // input exponent all 1s - assign Addend = FOpCtrlE[2] ? 64'b0 : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation assign XSgnE = FmtE ? X[63] : X[31]; assign YSgnE = FmtE ? Y[63] : Y[31]; - assign ZSgnE = FmtE ? Addend[63]^FOpCtrlE[0] : Addend[31]^FOpCtrlE[0]; + assign ZSgnE = FmtE ? Z[63] : Z[31]; - assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; + assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; + assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; + assign ZExpE = FmtE ? Z[62:52] : {Z[30], {3{~Z[30]&~ZExpZero|ZExpMaxE}}, Z[29:23]}; +/* assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]}; // *** maybe convert to full number of bits here? assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]}; - assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]}; + assign ZExpE = FmtE ? Z[62:52] : {3'b0, Z[30:23]};*/ assign XFracE = FmtE ? X[51:0] : {X[22:0], 29'b0}; assign YFracE = FmtE ? Y[51:0] : {Y[22:0], 29'b0}; - assign ZFracE = FmtE ? Addend[51:0] : {Addend[22:0], 29'b0}; + assign ZFracE = FmtE ? Z[51:0] : {Z[22:0], 29'b0}; - assign XAssumed1E = |XExpE; - assign YAssumed1E = |YExpE; - assign ZAssumed1E = |ZExpE; + assign XExpNonzero = FmtE ? |X[62:52] : |X[30:23]; + assign YExpNonzero = FmtE ? |Y[62:52] : |Y[30:23]; + assign ZExpNonzero = FmtE ? |Z[62:52] : |Z[30:23]; - assign XExpZero = ~XAssumed1E; - assign YExpZero = ~YAssumed1E; - assign ZExpZero = ~ZAssumed1E; + assign XManE = {XExpNonzero, XFracE}; + assign YManE = {YExpNonzero, YFracE}; + assign ZManE = {ZExpNonzero, ZFracE}; + + assign XExpZero = ~XExpNonzero; + assign YExpZero = ~YExpNonzero; + assign ZExpZero = ~ZExpNonzero; assign XFracZero = ~|XFracE; assign YFracZero = ~|YFracE; assign ZFracZero = ~|ZFracE; - assign XExpMaxE = FmtE ? &XExpE[10:0] : &XExpE[7:0]; - assign YExpMaxE = FmtE ? &YExpE[10:0] : &YExpE[7:0]; - assign ZExpMaxE = FmtE ? &ZExpE[10:0] : &ZExpE[7:0]; - + assign XExpMaxE = FmtE ? &X[62:52] : &X[30:23]; + assign YExpMaxE = FmtE ? &Y[62:52] : &Y[30:23]; + assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23]; + assign XNormE = ~(XExpMaxE|XExpZero); assign XNaNE = XExpMaxE & ~XFracZero; @@ -72,6 +78,7 @@ module unpacking ( assign YZeroE = YExpZero & YFracZero; assign ZZeroE = ZExpZero & ZFracZero; - assign BiasE = FmtE ? 13'h3ff : 13'h7f; + //assign BiasE = FmtE ? 13'h3ff : 13'h7f; // *** is it better to convert to full precision exponents so bias isn't needed? + assign BiasE = 13'h3ff; // always use 1023 because exponents are unpacked to double precision endmodule \ No newline at end of file diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 20df6e23..0090a43b 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -90,10 +90,10 @@ string tests32f[] = '{ "rv64f/I-FSW-01", "2000", "rv64f/I-FCLASS-S-01", "2000", "rv64f/I-FADD-S-01", "2000", - "rv64f/I-FCVT-S-L-01", "2000", - "rv64f/I-FCVT-S-LU-01", "2000", - "rv64f/I-FCVT-S-W-01", "2000", - "rv64f/I-FCVT-S-WU-01", "2000", +// "rv64f/I-FCVT-S-L-01", "2000", +// "rv64f/I-FCVT-S-LU-01", "2000", +// "rv64f/I-FCVT-S-W-01", "2000", +// "rv64f/I-FCVT-S-WU-01", "2000", "rv64f/I-FCVT-L-S-01", "2000", "rv64f/I-FCVT-LU-S-01", "2000", "rv64f/I-FCVT-W-S-01", "2000", @@ -122,16 +122,6 @@ string tests32f[] = '{ "rv64d/I-FMV-X-D-01", "2000", "rv64d/I-FMV-D-X-01", "2000", "rv64d/I-FDIV-D-01", "2000", - "rv64d/I-FCVT-D-L-01", "2000", - "rv64d/I-FCVT-D-LU-01", "2000", - "rv64d/I-FCVT-D-S-01", "2000", - "rv64d/I-FCVT-D-W-01", "2000", - "rv64d/I-FCVT-D-WU-01", "2000", - "rv64d/I-FCVT-L-D-01", "2000", - "rv64d/I-FCVT-LU-D-01", "2000", - "rv64d/I-FCVT-S-D-01", "2000", - "rv64d/I-FCVT-W-D-01", "2000", - "rv64d/I-FCVT-WU-D-01", "2000", "rv64d/I-FNMADD-D-01", "2000", "rv64d/I-FNMSUB-D-01", "2000", "rv64d/I-FMSUB-D-01", "2000", @@ -148,8 +138,18 @@ string tests32f[] = '{ "rv64d/I-FSGNJN-D-01", "2000", "rv64d/I-FSGNJX-D-01", "2000", "rv64d/I-FSQRT-D-01", "2000", - "rv64d/I-FSUB-D-01", "2000" - }; + "rv64d/I-FSUB-D-01", "2000", +// "rv64d/I-FCVT-D-L-01", "2000", +// "rv64d/I-FCVT-D-LU-01", "2000", + "rv64d/I-FCVT-D-S-01", "2000", +// "rv64d/I-FCVT-D-W-01", "2000", +// "rv64d/I-FCVT-D-WU-01", "2000", + "rv64d/I-FCVT-L-D-01", "2000", + "rv64d/I-FCVT-LU-D-01", "2000", + "rv64d/I-FCVT-S-D-01", "2000", + "rv64d/I-FCVT-W-D-01", "2000", + "rv64d/I-FCVT-WU-D-01", "2000" +}; string tests64a[] = '{ //"rv64a/WALLY-AMO", "2110",