diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv index e893fc81d..bf652a7fd 100644 --- a/wally-pipelined/src/fpu/fcvt.sv +++ b/wally-pipelined/src/fpu/fcvt.sv @@ -1,95 +1,122 @@ -// `include "wally-config.vh" +`include "wally-config.vh" module fcvt ( - input logic [63:0] X, - input logic [64-1:0] SrcAE, - input logic [3:0] FOpCtrlE, - input logic [2:0] FrmE, - input logic FmtE, - output logic [63:0] CvtResE, - output logic [4:0] CvtFlgE); + input logic [63:0] X, // floating point input + input logic [`XLEN-1:0] SrcAE, // integer input + input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below) + input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic FmtE, // precision 1 = double 0 = single + output logic [63:0] CvtResE, // convert final result + output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact} - logic [10:0] XExp; - logic [51:0] XFrac; - logic XSgn; - logic [10:0] ResExp,TmpExp; - logic [51:0] ResFrac; - logic ResSgn; - logic [10:0] NormCnt; - logic [11:0] Bias; // 1023 for double, 127 for single - logic [7:0] Bits, SubBits; - logic [64+51:0] ShiftedManTmp; - logic [64+51:0] ShiftVal; - logic [64+1:0] ShiftedMan; - logic [64:0] RoundedTmp; - logic [63:0] Rounded; - logic [12:0] ExpVal, ShiftCnt; - logic [64-1:0] PosInt; - - logic [64-1:0] CvtIntRes; - logic [63:0] CvtRes; - logic XFracZero, Of,Uf; - logic XExpMax; - logic XNaN, XDenorm, XInf, XZero; - logic Plus1,CalcPlus1, Guard, Round, LSB, Sticky; - logic SgnRes, In64; - logic Res64; - logic RoundMSB; - logic RoundSgn; - logic XExpZero; + logic XSgn; // FP input's sign + logic [10:0] XExp; // FP input's exponent + logic [51:0] XFrac; // FP input's fraction + logic ResSgn; // FP result's sign + logic [10:0] ResExp,TmpExp; // FP result's exponent + logic [51:0] ResFrac; // FP result's fraction + logic [5:0] LZResP; // lz output + // logic LZResV; + logic [11:0] Bias; // 1023 for double, 127 for single + logic [7:0] Bits; // how many bits are in the integer result + logic [7:0] SubBits; // subtract these bits from the exponent (FP result) + logic [`XLEN+51:0] ShiftedManTmp; // Shifted mantissa + logic [`XLEN+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|) + logic [`XLEN+1:0] ShiftedMan; // shifted mantissa truncated + logic [64:0] RoundedTmp; // full size rounded result - in case of overfow + logic [63:0] Rounded; // rounded result + logic [12:0] ExpVal; // unbiased X exponent + logic [12:0] ShiftCnt; // how much is the mantissa shifted + logic [`XLEN-1:0] IntIn; // trimed integer input + logic [`XLEN-1:0] PosInt; // absolute value of the integer input + logic [63:0] CvtIntRes; // interger result from the fp -> int instructions + logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions + logic XFracZero; // is the fraction of X zero? + logic Of, Uf; // did the integer result underflow or overflow + logic XExpZero; // is X's exponent zero + logic XExpMax; // is the exponent all ones + logic XNaN, XDenorm, XInf, XZero; // is X a special value + logic Guard, Round, LSB, Sticky; // bits used to determine rounding + logic Plus1,CalcPlus1; // do you add one for rounding + logic SgnRes; // sign of the floating point result + logic Res64, In64; // is the result or input 64 bits + logic RoundMSB; // most significant bit of the fraction + logic RoundSgn; // sign of the rounded result - // fcvt.w.s = 0010 - - // fcvt.wu.s = 0110 - - // fcvt.s.w = 0001 - // fcvt.s.wu = 0101 - // fcvt.l.s = 1010 - - // fcvt.lu.s = 1110 - - // fcvt.s.l = 1001 - // fcvt.s.lu = 1101 - // fcvt.w.d = 0010 - - // fcvt.wu.d = 0110 - - // fcvt.d.w = 0001 - // fcvt.d.wu = 0101 - // fcvt.l.d = 1010 - - // fcvt.lu.d = 1110 - - // fcvt.d.l = 1001 -- - // fcvt.d.lu = 1101 -- - // {long, unsigned, to int, from int} Fmt controls the output for fp -> fp + // FOpCtrlE: + // fcvt.w.s = 0010 + // fcvt.wu.s = 0110 + // fcvt.s.w = 0001 + // fcvt.s.wu = 0101 + // fcvt.l.s = 1010 + // fcvt.lu.s = 1110 + // fcvt.s.l = 1001 + // fcvt.s.lu = 1101 + // fcvt.w.d = 0010 + // fcvt.wu.d = 0110 + // fcvt.d.w = 0001 + // fcvt.d.wu = 0101 + // fcvt.l.d = 1010 + // fcvt.lu.d = 1110 + // fcvt.d.l = 1001 + // fcvt.d.lu = 1101 + // {long, unsigned, to int, from int} + + // split the input into it's various parts assign XSgn = X[63]; assign XExp = FmtE ? X[62:52] : {3'b0, X[62:55]}; assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; + + // determine if the exponent and fraction are all zero or ones assign XExpZero = ~|XExp; - assign XFracZero = ~|XFrac; assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; + + // determine if X is a special value assign XNaN = XExpMax & ~XFracZero; assign XDenorm = XExpZero & ~XFracZero; assign XInf = XExpMax & XFracZero; assign XZero = XExpZero & XFracZero; - + // calculate signals based off the input and output's size assign Bias = FmtE ? 12'h3ff : 12'h7f; assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101))); assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE))); assign SubBits = In64 ? 8'd64 : 8'd32; assign Bits = Res64 ? 8'd64 : 8'd32; + + // calulate the unbiased exponent assign ExpVal = XExp - Bias + XDenorm; //////////////////////////////////////////////////////// - logic [64-1:0] IntIn; + // position the input in the most significant bits assign IntIn = FOpCtrlE[3] ? SrcAE : {SrcAE[31:0], 32'b0}; + // make the integer positive assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn; + // determine the integer's sign assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0; + // This did not work \/ + // generate + // if(`XLEN == 64) + // lz64 lz(LZResP, LZResV, PosInt); + // else if(`XLEN == 32) begin + // assign LZResP[5] = 1'b0; + // lz32 lz(LZResP[4:0], LZResV, PosInt); + // end + // endgenerate + // Leading one detector logic [8:0] i; always_comb begin i = 0; while (~PosInt[64-1-i] && i <= 64) i = i+1; // search for leading one - NormCnt = i+1; // compute shift count + LZResP = i+1; // compute shift count end - assign TmpExp = i==64 ? 0 : Bias + SubBits - NormCnt; + + // if no one was found set to zero otherwise calculate the exponent + assign TmpExp = i==64 ? 0 : Bias + SubBits - LZResP; @@ -97,15 +124,21 @@ module fcvt ( //////////////////////////////////////////// + // select the shift value and amount based on operation (to fp or int) + assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP; + assign ShiftVal = FOpCtrlE[1] ? {{`XLEN-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0}; - assign ShiftCnt = FOpCtrlE[1] ? ExpVal : NormCnt; - assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0}; - //if shift = -1 then shift one bit right for round to nearest (shift over 2 never rounds) - // if the shift is negitive add bit for sticky bit + // if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds) + // if the shift is negitive add a bit for sticky bit calculation // otherwise shift left - assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {115'b0, ~XZero} : ShiftVal << ShiftCnt; + assign ShiftedManTmp = &ShiftCnt ? {{`XLEN-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{`XLEN+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt; + // truncate the shifted mantissa assign ShiftedMan = ShiftedManTmp[64+51:50]; + + // calculate sticky bit + // - take into account the possible right shift from before + // - the sticky bit calculation covers three diffrent sizes depending on the opperation assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); @@ -126,33 +159,45 @@ module fcvt ( endcase end + // dont tound if the result is exact assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]); + // round the shifted mantissa assign RoundedTmp = ShiftedMan[64+1:2] + Plus1; assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ; + // fit the rounded result into the appropriate size and take the 2's complement if needed assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]}; + + // extract the MSB and Sign for later use (will be used to determine underflow and overflow) assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32]; assign RoundSgn = Res64 ? Rounded[63] : Rounded[31]; - - // Choose result - // double to unsigned long - // >2^64-1 or +inf or NaN - all 1's - // <0 or -inf - zero - // otherwise rounded result - //assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN; + // check if the result overflows assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN; + + // check if the result underflows (this calculation changes if the result is signed or unsigned) assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded; + + // calculate the result's sign assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1]; + + // select the integer result assign CvtIntRes = Of ? FOpCtrlE[2] ? SgnRes ? {32'b0, {32{1'b1}}}: {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} : Rounded[64-1:0]; - - assign CvtRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0}; - assign CvtResE = FOpCtrlE[0] ? CvtRes : CvtIntRes; + + // select the floating point result + assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0}; + + // select the result + assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes; + + // calculate the flags + // - to int only sets the invalid flag + // - from int only sets the inexact flag assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]}; diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index ab351bd78..0ff199129 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -40,8 +40,7 @@ module fpu ( output logic [`XLEN-1:0] FIntResM, output logic FDivBusyE, // Is the divison/sqrt unit busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - output logic [4:0] SetFflagsM, // FPU flags - output logic [`XLEN-1:0] FPUResultW); // FPU result + output logic [4:0] SetFflagsM); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS generate @@ -79,6 +78,9 @@ module fpu ( logic [63:0] FMAResM, FMAResW; logic [4:0] FMAFlgM, FMAFlgW; + + logic [63:0] ReadResW; + // add/cvt signals logic [63:0] FAddResM, FAddResW; logic [4:0] FAddFlgM, FAddFlgW; @@ -102,7 +104,7 @@ module fpu ( logic [63:0] ClassResE, ClassResM; // 64-bit FPU result - logic [63:0] FPUResult64W; + logic [63:0] FPUResultW; logic [4:0] FPUFlagsW; @@ -124,7 +126,7 @@ module fpu ( // regfile instantiation fregfile fregfile (clk, reset, FWriteEnW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResult64W, + FPUResultW, FRD1D, FRD2D, FRD3D); @@ -168,9 +170,9 @@ module fpu ( .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); - mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); - mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE); // first of two-stage instance of floating-point fused multiply-add unit @@ -218,8 +220,7 @@ module fpu ( fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); // output for store instructions - assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - //***swap to mux + mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE); @@ -265,8 +266,7 @@ module fpu ( mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); - //***change to mux - assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux2 #(`XLEN) SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned); mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); @@ -318,28 +318,10 @@ module fpu ( //######################################### + mux2 #(64) ReadResMux({ReadDataW[31:0], 32'b0}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, ReadResW); + mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); + - always_comb begin - case (FResultSelW) - 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FMAResW; - 3'b010 : FPUResult64W = FAddResW; - 3'b011 : FPUResult64W = FDivResultW; - 3'b100 : FPUResult64W = FResW; - default : FPUResult64W = 64'bxxxxx; - endcase - end - - - // interface between XLEN size datapath and double-precision sized - // floating-point results - // - // define offsets for LSB zero extension or truncation - always_comb begin - // zero extension -//***turn into mux - FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; - end end else begin // no F_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; @@ -350,7 +332,6 @@ module fpu ( assign FDivBusyE = 0; assign IllegalFPUInstrD = 1; assign SetFflagsM = 0; - assign FPUResultW = 0; end endgenerate