fcvt.sv cleanup

This commit is contained in:
Katherine Parry 2021-07-11 21:30:01 -04:00
parent 0cc07fda1b
commit a4bd128978
2 changed files with 132 additions and 106 deletions

View File

@ -1,95 +1,122 @@
// `include "wally-config.vh"
`include "wally-config.vh"
module fcvt (
input logic [63:0] X,
input logic [64-1:0] SrcAE,
input logic [3:0] FOpCtrlE,
input logic [2:0] FrmE,
input logic FmtE,
output logic [63:0] CvtResE,
output logic [4:0] CvtFlgE);
input logic [63:0] X, // floating point input
input logic [`XLEN-1:0] SrcAE, // integer input
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic FmtE, // precision 1 = double 0 = single
output logic [63:0] CvtResE, // convert final result
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
logic [10:0] XExp;
logic [51:0] XFrac;
logic XSgn;
logic [10:0] ResExp,TmpExp;
logic [51:0] ResFrac;
logic ResSgn;
logic [10:0] NormCnt;
logic XSgn; // FP input's sign
logic [10:0] XExp; // FP input's exponent
logic [51:0] XFrac; // FP input's fraction
logic ResSgn; // FP result's sign
logic [10:0] ResExp,TmpExp; // FP result's exponent
logic [51:0] ResFrac; // FP result's fraction
logic [5:0] LZResP; // lz output
// logic LZResV;
logic [11:0] Bias; // 1023 for double, 127 for single
logic [7:0] Bits, SubBits;
logic [64+51:0] ShiftedManTmp;
logic [64+51:0] ShiftVal;
logic [64+1:0] ShiftedMan;
logic [64:0] RoundedTmp;
logic [63:0] Rounded;
logic [12:0] ExpVal, ShiftCnt;
logic [64-1:0] PosInt;
logic [7:0] Bits; // how many bits are in the integer result
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
logic [`XLEN+51:0] ShiftedManTmp; // Shifted mantissa
logic [`XLEN+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
logic [`XLEN+1:0] ShiftedMan; // shifted mantissa truncated
logic [64:0] RoundedTmp; // full size rounded result - in case of overfow
logic [63:0] Rounded; // rounded result
logic [12:0] ExpVal; // unbiased X exponent
logic [12:0] ShiftCnt; // how much is the mantissa shifted
logic [`XLEN-1:0] IntIn; // trimed integer input
logic [`XLEN-1:0] PosInt; // absolute value of the integer input
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
logic XFracZero; // is the fraction of X zero?
logic Of, Uf; // did the integer result underflow or overflow
logic XExpZero; // is X's exponent zero
logic XExpMax; // is the exponent all ones
logic XNaN, XDenorm, XInf, XZero; // is X a special value
logic Guard, Round, LSB, Sticky; // bits used to determine rounding
logic Plus1,CalcPlus1; // do you add one for rounding
logic SgnRes; // sign of the floating point result
logic Res64, In64; // is the result or input 64 bits
logic RoundMSB; // most significant bit of the fraction
logic RoundSgn; // sign of the rounded result
logic [64-1:0] CvtIntRes;
logic [63:0] CvtRes;
logic XFracZero, Of,Uf;
logic XExpMax;
logic XNaN, XDenorm, XInf, XZero;
logic Plus1,CalcPlus1, Guard, Round, LSB, Sticky;
logic SgnRes, In64;
logic Res64;
logic RoundMSB;
logic RoundSgn;
logic XExpZero;
// fcvt.w.s = 0010 -
// fcvt.wu.s = 0110 -
// FOpCtrlE:
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.l.s = 1010 -
// fcvt.lu.s = 1110 -
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010 -
// fcvt.wu.d = 0110 -
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.l.d = 1010 -
// fcvt.lu.d = 1110 -
// fcvt.d.l = 1001 --
// fcvt.d.lu = 1101 --
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int}
// split the input into it's various parts
assign XSgn = X[63];
assign XExp = FmtE ? X[62:52] : {3'b0, X[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign XExpZero = ~|XExp;
// determine if the exponent and fraction are all zero or ones
assign XExpZero = ~|XExp;
assign XFracZero = ~|XFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
// determine if X is a special value
assign XNaN = XExpMax & ~XFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign XInf = XExpMax & XFracZero;
assign XZero = XExpZero & XFracZero;
// calculate signals based off the input and output's size
assign Bias = FmtE ? 12'h3ff : 12'h7f;
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
assign SubBits = In64 ? 8'd64 : 8'd32;
assign Bits = Res64 ? 8'd64 : 8'd32;
// calulate the unbiased exponent
assign ExpVal = XExp - Bias + XDenorm;
////////////////////////////////////////////////////////
logic [64-1:0] IntIn;
// position the input in the most significant bits
assign IntIn = FOpCtrlE[3] ? SrcAE : {SrcAE[31:0], 32'b0};
// make the integer positive
assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
// determine the integer's sign
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
// This did not work \/
// generate
// if(`XLEN == 64)
// lz64 lz(LZResP, LZResV, PosInt);
// else if(`XLEN == 32) begin
// assign LZResP[5] = 1'b0;
// lz32 lz(LZResP[4:0], LZResV, PosInt);
// end
// endgenerate
// Leading one detector
logic [8:0] i;
always_comb begin
i = 0;
while (~PosInt[64-1-i] && i <= 64) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
LZResP = i+1; // compute shift count
end
assign TmpExp = i==64 ? 0 : Bias + SubBits - NormCnt;
// if no one was found set to zero otherwise calculate the exponent
assign TmpExp = i==64 ? 0 : Bias + SubBits - LZResP;
@ -97,15 +124,21 @@ module fcvt (
////////////////////////////////////////////
// select the shift value and amount based on operation (to fp or int)
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
assign ShiftVal = FOpCtrlE[1] ? {{`XLEN-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : NormCnt;
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
//if shift = -1 then shift one bit right for round to nearest (shift over 2 never rounds)
// if the shift is negitive add bit for sticky bit
// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
// if the shift is negitive add a bit for sticky bit calculation
// otherwise shift left
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {115'b0, ~XZero} : ShiftVal << ShiftCnt;
assign ShiftedManTmp = &ShiftCnt ? {{`XLEN-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{`XLEN+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt;
// truncate the shifted mantissa
assign ShiftedMan = ShiftedManTmp[64+51:50];
// calculate sticky bit
// - take into account the possible right shift from before
// - the sticky bit calculation covers three diffrent sizes depending on the opperation
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
@ -126,33 +159,45 @@ module fcvt (
endcase
end
// dont tound if the result is exact
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);
// round the shifted mantissa
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
// fit the rounded result into the appropriate size and take the 2's complement if needed
assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
// extract the MSB and Sign for later use (will be used to determine underflow and overflow)
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
// Choose result
// double to unsigned long
// >2^64-1 or +inf or NaN - all 1's
// <0 or -inf - zero
// otherwise rounded result
//assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
// check if the result overflows
assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
// check if the result underflows (this calculation changes if the result is signed or unsigned)
assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
// calculate the result's sign
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
// select the integer result
assign CvtIntRes = Of ? FOpCtrlE[2] ? SgnRes ? {32'b0, {32{1'b1}}}: {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} :
Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
Rounded[64-1:0];
assign CvtRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
assign CvtResE = FOpCtrlE[0] ? CvtRes : CvtIntRes;
// select the floating point result
assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
// select the result
assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
// calculate the flags
// - to int only sets the invalid flag
// - from int only sets the inexact flag
assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};

View File

@ -40,8 +40,7 @@ module fpu (
output logic [`XLEN-1:0] FIntResM,
output logic FDivBusyE, // Is the divison/sqrt unit busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM, // FPU flags
output logic [`XLEN-1:0] FPUResultW); // FPU result
output logic [4:0] SetFflagsM); // FPU result
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
generate
@ -79,6 +78,9 @@ module fpu (
logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW;
logic [63:0] ReadResW;
// add/cvt signals
logic [63:0] FAddResM, FAddResW;
logic [4:0] FAddFlgM, FAddFlgW;
@ -102,7 +104,7 @@ module fpu (
logic [63:0] ClassResE, ClassResM;
// 64-bit FPU result
logic [63:0] FPUResult64W;
logic [63:0] FPUResultW;
logic [4:0] FPUFlagsW;
@ -124,7 +126,7 @@ module fpu (
// regfile instantiation
fregfile fregfile (clk, reset, FWriteEnW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResult64W,
FPUResultW,
FRD1D, FRD2D, FRD3D);
@ -168,9 +170,9 @@ module fpu (
.ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE);
mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE);
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE);
// first of two-stage instance of floating-point fused multiply-add unit
@ -218,8 +220,7 @@ module fpu (
fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
// output for store instructions
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
//***swap to mux
mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE);
@ -265,8 +266,7 @@ module fpu (
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
//***change to mux
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
mux2 #(`XLEN) SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
@ -318,28 +318,10 @@ module fpu (
//#########################################
always_comb begin
case (FResultSelW)
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
3'b001 : FPUResult64W = FMAResW;
3'b010 : FPUResult64W = FAddResW;
3'b011 : FPUResult64W = FDivResultW;
3'b100 : FPUResult64W = FResW;
default : FPUResult64W = 64'bxxxxx;
endcase
end
mux2 #(64) ReadResMux({ReadDataW[31:0], 32'b0}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, ReadResW);
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
// interface between XLEN size datapath and double-precision sized
// floating-point results
//
// define offsets for LSB zero extension or truncation
always_comb begin
// zero extension
//***turn into mux
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
end
end else begin // no F_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;
@ -350,7 +332,6 @@ module fpu (
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
assign FPUResultW = 0;
end
endgenerate