mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-03 18:25:27 +00:00
fcvt.sv cleanup
This commit is contained in:
parent
0cc07fda1b
commit
a4bd128978
@ -1,95 +1,122 @@
|
||||
|
||||
// `include "wally-config.vh"
|
||||
`include "wally-config.vh"
|
||||
module fcvt (
|
||||
input logic [63:0] X,
|
||||
input logic [64-1:0] SrcAE,
|
||||
input logic [3:0] FOpCtrlE,
|
||||
input logic [2:0] FrmE,
|
||||
input logic FmtE,
|
||||
output logic [63:0] CvtResE,
|
||||
output logic [4:0] CvtFlgE);
|
||||
input logic [63:0] X, // floating point input
|
||||
input logic [`XLEN-1:0] SrcAE, // integer input
|
||||
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [63:0] CvtResE, // convert final result
|
||||
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
logic [10:0] XExp;
|
||||
logic [51:0] XFrac;
|
||||
logic XSgn;
|
||||
logic [10:0] ResExp,TmpExp;
|
||||
logic [51:0] ResFrac;
|
||||
logic ResSgn;
|
||||
logic [10:0] NormCnt;
|
||||
logic [11:0] Bias; // 1023 for double, 127 for single
|
||||
logic [7:0] Bits, SubBits;
|
||||
logic [64+51:0] ShiftedManTmp;
|
||||
logic [64+51:0] ShiftVal;
|
||||
logic [64+1:0] ShiftedMan;
|
||||
logic [64:0] RoundedTmp;
|
||||
logic [63:0] Rounded;
|
||||
logic [12:0] ExpVal, ShiftCnt;
|
||||
logic [64-1:0] PosInt;
|
||||
|
||||
logic [64-1:0] CvtIntRes;
|
||||
logic [63:0] CvtRes;
|
||||
logic XFracZero, Of,Uf;
|
||||
logic XExpMax;
|
||||
logic XNaN, XDenorm, XInf, XZero;
|
||||
logic Plus1,CalcPlus1, Guard, Round, LSB, Sticky;
|
||||
logic SgnRes, In64;
|
||||
logic Res64;
|
||||
logic RoundMSB;
|
||||
logic RoundSgn;
|
||||
logic XExpZero;
|
||||
logic XSgn; // FP input's sign
|
||||
logic [10:0] XExp; // FP input's exponent
|
||||
logic [51:0] XFrac; // FP input's fraction
|
||||
logic ResSgn; // FP result's sign
|
||||
logic [10:0] ResExp,TmpExp; // FP result's exponent
|
||||
logic [51:0] ResFrac; // FP result's fraction
|
||||
logic [5:0] LZResP; // lz output
|
||||
// logic LZResV;
|
||||
logic [11:0] Bias; // 1023 for double, 127 for single
|
||||
logic [7:0] Bits; // how many bits are in the integer result
|
||||
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
|
||||
logic [`XLEN+51:0] ShiftedManTmp; // Shifted mantissa
|
||||
logic [`XLEN+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|)
|
||||
logic [`XLEN+1:0] ShiftedMan; // shifted mantissa truncated
|
||||
logic [64:0] RoundedTmp; // full size rounded result - in case of overfow
|
||||
logic [63:0] Rounded; // rounded result
|
||||
logic [12:0] ExpVal; // unbiased X exponent
|
||||
logic [12:0] ShiftCnt; // how much is the mantissa shifted
|
||||
logic [`XLEN-1:0] IntIn; // trimed integer input
|
||||
logic [`XLEN-1:0] PosInt; // absolute value of the integer input
|
||||
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
|
||||
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
|
||||
logic XFracZero; // is the fraction of X zero?
|
||||
logic Of, Uf; // did the integer result underflow or overflow
|
||||
logic XExpZero; // is X's exponent zero
|
||||
logic XExpMax; // is the exponent all ones
|
||||
logic XNaN, XDenorm, XInf, XZero; // is X a special value
|
||||
logic Guard, Round, LSB, Sticky; // bits used to determine rounding
|
||||
logic Plus1,CalcPlus1; // do you add one for rounding
|
||||
logic SgnRes; // sign of the floating point result
|
||||
logic Res64, In64; // is the result or input 64 bits
|
||||
logic RoundMSB; // most significant bit of the fraction
|
||||
logic RoundSgn; // sign of the rounded result
|
||||
|
||||
// fcvt.w.s = 0010 -
|
||||
// fcvt.wu.s = 0110 -
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.l.s = 1010 -
|
||||
// fcvt.lu.s = 1110 -
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010 -
|
||||
// fcvt.wu.d = 0110 -
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.l.d = 1010 -
|
||||
// fcvt.lu.d = 1110 -
|
||||
// fcvt.d.l = 1001 --
|
||||
// fcvt.d.lu = 1101 --
|
||||
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
|
||||
// FOpCtrlE:
|
||||
// fcvt.w.s = 0010
|
||||
// fcvt.wu.s = 0110
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.l.s = 1010
|
||||
// fcvt.lu.s = 1110
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010
|
||||
// fcvt.wu.d = 0110
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.l.d = 1010
|
||||
// fcvt.lu.d = 1110
|
||||
// fcvt.d.l = 1001
|
||||
// fcvt.d.lu = 1101
|
||||
// {long, unsigned, to int, from int}
|
||||
|
||||
// split the input into it's various parts
|
||||
assign XSgn = X[63];
|
||||
assign XExp = FmtE ? X[62:52] : {3'b0, X[62:55]};
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
|
||||
// determine if the exponent and fraction are all zero or ones
|
||||
assign XExpZero = ~|XExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
|
||||
// determine if X is a special value
|
||||
assign XNaN = XExpMax & ~XFracZero;
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign XInf = XExpMax & XFracZero;
|
||||
assign XZero = XExpZero & XFracZero;
|
||||
|
||||
|
||||
// calculate signals based off the input and output's size
|
||||
assign Bias = FmtE ? 12'h3ff : 12'h7f;
|
||||
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
|
||||
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
|
||||
assign SubBits = In64 ? 8'd64 : 8'd32;
|
||||
assign Bits = Res64 ? 8'd64 : 8'd32;
|
||||
|
||||
// calulate the unbiased exponent
|
||||
assign ExpVal = XExp - Bias + XDenorm;
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
logic [64-1:0] IntIn;
|
||||
// position the input in the most significant bits
|
||||
assign IntIn = FOpCtrlE[3] ? SrcAE : {SrcAE[31:0], 32'b0};
|
||||
// make the integer positive
|
||||
assign PosInt = IntIn[64-1]&~FOpCtrlE[2] ? -IntIn : IntIn;
|
||||
// determine the integer's sign
|
||||
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
|
||||
|
||||
// This did not work \/
|
||||
// generate
|
||||
// if(`XLEN == 64)
|
||||
// lz64 lz(LZResP, LZResV, PosInt);
|
||||
// else if(`XLEN == 32) begin
|
||||
// assign LZResP[5] = 1'b0;
|
||||
// lz32 lz(LZResP[4:0], LZResV, PosInt);
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// Leading one detector
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~PosInt[64-1-i] && i <= 64) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
LZResP = i+1; // compute shift count
|
||||
end
|
||||
assign TmpExp = i==64 ? 0 : Bias + SubBits - NormCnt;
|
||||
|
||||
// if no one was found set to zero otherwise calculate the exponent
|
||||
assign TmpExp = i==64 ? 0 : Bias + SubBits - LZResP;
|
||||
|
||||
|
||||
|
||||
@ -97,15 +124,21 @@ module fcvt (
|
||||
////////////////////////////////////////////
|
||||
|
||||
|
||||
// select the shift value and amount based on operation (to fp or int)
|
||||
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
|
||||
assign ShiftVal = FOpCtrlE[1] ? {{`XLEN-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
|
||||
|
||||
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : NormCnt;
|
||||
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
|
||||
//if shift = -1 then shift one bit right for round to nearest (shift over 2 never rounds)
|
||||
// if the shift is negitive add bit for sticky bit
|
||||
// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
|
||||
// if the shift is negitive add a bit for sticky bit calculation
|
||||
// otherwise shift left
|
||||
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {115'b0, ~XZero} : ShiftVal << ShiftCnt;
|
||||
assign ShiftedManTmp = &ShiftCnt ? {{`XLEN-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{`XLEN+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt;
|
||||
|
||||
// truncate the shifted mantissa
|
||||
assign ShiftedMan = ShiftedManTmp[64+51:50];
|
||||
|
||||
// calculate sticky bit
|
||||
// - take into account the possible right shift from before
|
||||
// - the sticky bit calculation covers three diffrent sizes depending on the opperation
|
||||
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
|
||||
|
||||
|
||||
@ -126,33 +159,45 @@ module fcvt (
|
||||
endcase
|
||||
end
|
||||
|
||||
// dont tound if the result is exact
|
||||
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);
|
||||
|
||||
// round the shifted mantissa
|
||||
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
|
||||
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
|
||||
|
||||
// fit the rounded result into the appropriate size and take the 2's complement if needed
|
||||
assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
|
||||
XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
|
||||
|
||||
// extract the MSB and Sign for later use (will be used to determine underflow and overflow)
|
||||
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
|
||||
assign RoundSgn = Res64 ? Rounded[63] : Rounded[31];
|
||||
|
||||
|
||||
|
||||
// Choose result
|
||||
// double to unsigned long
|
||||
// >2^64-1 or +inf or NaN - all 1's
|
||||
// <0 or -inf - zero
|
||||
// otherwise rounded result
|
||||
//assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
|
||||
// check if the result overflows
|
||||
assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
|
||||
|
||||
// check if the result underflows (this calculation changes if the result is signed or unsigned)
|
||||
assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
|
||||
|
||||
// calculate the result's sign
|
||||
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
|
||||
|
||||
// select the integer result
|
||||
assign CvtIntRes = Of ? FOpCtrlE[2] ? SgnRes ? {32'b0, {32{1'b1}}}: {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} :
|
||||
Uf ? FOpCtrlE[2] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
|
||||
Rounded[64-1:0];
|
||||
|
||||
assign CvtRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
|
||||
assign CvtResE = FOpCtrlE[0] ? CvtRes : CvtIntRes;
|
||||
|
||||
// select the floating point result
|
||||
assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {ResSgn, ResExp[7:0], ResFrac, 3'b0};
|
||||
|
||||
// select the result
|
||||
assign CvtResE = FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
|
||||
|
||||
// calculate the flags
|
||||
// - to int only sets the invalid flag
|
||||
// - from int only sets the inexact flag
|
||||
assign CvtFlgE = {(Of | Uf)&FOpCtrlE[1], 3'b0, (Guard|Round|Sticky)&FOpCtrlE[0]};
|
||||
|
||||
|
||||
|
@ -40,8 +40,7 @@ module fpu (
|
||||
output logic [`XLEN-1:0] FIntResM,
|
||||
output logic FDivBusyE, // Is the divison/sqrt unit busy
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic [4:0] SetFflagsM, // FPU flags
|
||||
output logic [`XLEN-1:0] FPUResultW); // FPU result
|
||||
output logic [4:0] SetFflagsM); // FPU result
|
||||
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
|
||||
|
||||
generate
|
||||
@ -79,6 +78,9 @@ module fpu (
|
||||
logic [63:0] FMAResM, FMAResW;
|
||||
logic [4:0] FMAFlgM, FMAFlgW;
|
||||
|
||||
|
||||
logic [63:0] ReadResW;
|
||||
|
||||
// add/cvt signals
|
||||
logic [63:0] FAddResM, FAddResW;
|
||||
logic [4:0] FAddFlgM, FAddFlgW;
|
||||
@ -102,7 +104,7 @@ module fpu (
|
||||
logic [63:0] ClassResE, ClassResM;
|
||||
|
||||
// 64-bit FPU result
|
||||
logic [63:0] FPUResult64W;
|
||||
logic [63:0] FPUResultW;
|
||||
logic [4:0] FPUFlagsW;
|
||||
|
||||
|
||||
@ -124,7 +126,7 @@ module fpu (
|
||||
// regfile instantiation
|
||||
fregfile fregfile (clk, reset, FWriteEnW,
|
||||
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
|
||||
FPUResult64W,
|
||||
FPUResultW,
|
||||
FRD1D, FRD2D, FRD3D);
|
||||
|
||||
|
||||
@ -168,9 +170,9 @@ module fpu (
|
||||
.ForwardXE, .ForwardYE, .ForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
|
||||
mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE);
|
||||
mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE);
|
||||
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE);
|
||||
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE);
|
||||
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE);
|
||||
|
||||
|
||||
// first of two-stage instance of floating-point fused multiply-add unit
|
||||
@ -218,8 +220,7 @@ module fpu (
|
||||
fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
|
||||
|
||||
// output for store instructions
|
||||
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
|
||||
//***swap to mux
|
||||
mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE);
|
||||
|
||||
|
||||
|
||||
@ -265,8 +266,7 @@ module fpu (
|
||||
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
|
||||
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
|
||||
|
||||
//***change to mux
|
||||
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
|
||||
mux2 #(`XLEN) SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned);
|
||||
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
|
||||
|
||||
@ -318,28 +318,10 @@ module fpu (
|
||||
//#########################################
|
||||
|
||||
|
||||
mux2 #(64) ReadResMux({ReadDataW[31:0], 32'b0}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, ReadResW);
|
||||
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
|
||||
|
||||
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
|
||||
3'b001 : FPUResult64W = FMAResW;
|
||||
3'b010 : FPUResult64W = FAddResW;
|
||||
3'b011 : FPUResult64W = FDivResultW;
|
||||
3'b100 : FPUResult64W = FResW;
|
||||
default : FPUResult64W = 64'bxxxxx;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
// interface between XLEN size datapath and double-precision sized
|
||||
// floating-point results
|
||||
//
|
||||
// define offsets for LSB zero extension or truncation
|
||||
always_comb begin
|
||||
// zero extension
|
||||
//***turn into mux
|
||||
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
|
||||
end
|
||||
end else begin // no F_SUPPORTED; tie outputs low
|
||||
assign FStallD = 0;
|
||||
assign FWriteIntE = 0;
|
||||
@ -350,7 +332,6 @@ module fpu (
|
||||
assign FDivBusyE = 0;
|
||||
assign IllegalFPUInstrD = 1;
|
||||
assign SetFflagsM = 0;
|
||||
assign FPUResultW = 0;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user