forked from Github_Repos/cvw
moved ctrl signal registers into fctrl, also a lot of code cleaning
This commit is contained in:
parent
d7e90a7086
commit
fb890d621d
@ -34,20 +34,20 @@ module divsqrt(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [`NF:0] XManE, YManE,
|
||||
input logic [`NE-1:0] XExpE, YExpE,
|
||||
input logic [`NF:0] XmE, YmE,
|
||||
input logic [`NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic DivStartE,
|
||||
input logic StallM,
|
||||
input logic StallE,
|
||||
output logic DivStickyM,
|
||||
input logic StallE,
|
||||
output logic DivSM,
|
||||
output logic DivBusy,
|
||||
output logic DivDone,
|
||||
output logic [`NE+1:0] DivCalcExpM,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`DURLEN-1:0] EarlyTermShiftM,
|
||||
output logic [`QLEN-1-(`RADIX/4):0] QuotM
|
||||
output logic [`QLEN-1-(`RADIX/4):0] QmM
|
||||
// output logic [`XLEN-1:0] RemM,
|
||||
);
|
||||
|
||||
@ -60,10 +60,10 @@ module divsqrt(
|
||||
logic [`DURLEN-1:0] Dur;
|
||||
logic NegSticky;
|
||||
|
||||
srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
|
||||
srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
|
||||
|
||||
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
|
||||
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
|
||||
.StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
|
||||
srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
|
||||
.StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
|
||||
srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
|
||||
.StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM);
|
||||
endmodule
|
@ -29,29 +29,29 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fclassify (
|
||||
input logic XSgnE, // sign bit
|
||||
input logic XNaNE, // is NaN
|
||||
input logic XSNaNE, // is signaling NaN
|
||||
input logic XDenormE, // is denormal
|
||||
input logic XZeroE, // is zero
|
||||
input logic XInfE, // is infinity
|
||||
output logic [`XLEN-1:0] ClassResE // classify result
|
||||
);
|
||||
input logic Xs, // sign bit
|
||||
input logic XNaN, // is NaN
|
||||
input logic XSNaN, // is signaling NaN
|
||||
input logic XDenorm,// is denormal
|
||||
input logic XZero, // is zero
|
||||
input logic XInf, // is infinity
|
||||
output logic [`XLEN-1:0] ClassRes// classify result
|
||||
);
|
||||
|
||||
logic PInf, PZero, PNorm, PDenorm;
|
||||
logic NInf, NZero, NNorm, NDenorm;
|
||||
logic XNormE;
|
||||
logic XNorm;
|
||||
|
||||
// determine the sub categories
|
||||
assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE);
|
||||
assign PInf = ~XSgnE&XInfE;
|
||||
assign NInf = XSgnE&XInfE;
|
||||
assign PNorm = ~XSgnE&XNormE;
|
||||
assign NNorm = XSgnE&XNormE;
|
||||
assign PDenorm = ~XSgnE&XDenormE;
|
||||
assign NDenorm = XSgnE&XDenormE;
|
||||
assign PZero = ~XSgnE&XZeroE;
|
||||
assign NZero = XSgnE&XZeroE;
|
||||
assign XNorm= ~(XNaN | XInf| XDenorm| XZero);
|
||||
assign PInf = ~Xs&XInf;
|
||||
assign NInf = Xs&XInf;
|
||||
assign PNorm = ~Xs&XNorm;
|
||||
assign NNorm = Xs&XNorm;
|
||||
assign PDenorm = ~Xs&XDenorm;
|
||||
assign NDenorm = Xs&XDenorm;
|
||||
assign PZero = ~Xs&XZero;
|
||||
assign NZero = Xs&XZero;
|
||||
|
||||
// determine sub category and combine into the result
|
||||
// bit 0 - -Inf
|
||||
@ -64,6 +64,6 @@ module fclassify (
|
||||
// bit 7 - +Inf
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
|
||||
endmodule
|
||||
|
@ -27,9 +27,10 @@
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
// FOpCtrlE values
|
||||
// OpCtrl values
|
||||
// 110 min
|
||||
// 101 max
|
||||
// 010 equal
|
||||
@ -37,36 +38,32 @@
|
||||
// 011 less than or equal
|
||||
|
||||
module fcmp (
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlE, // see above table
|
||||
input logic XSgnE, YSgnE, // input signs
|
||||
input logic [`NE-1:0] XExpE, YExpE, // input exponents
|
||||
input logic [`NF:0] XManE, YManE, // input mantissa
|
||||
input logic XZeroE, YZeroE, // is zero
|
||||
input logic XNaNE, YNaNE, // is NaN
|
||||
input logic XSNaNE, YSNaNE, // is signaling NaN
|
||||
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
|
||||
output logic CmpNVE, // invalid flag
|
||||
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
|
||||
output logic [`XLEN-1:0] CmpIntResE // compare resilt
|
||||
input logic [`FMTBITS-1:0] Fmt, // format of fp number
|
||||
input logic [2:0] OpCtrl, // see above table
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [`NE-1:0] Xe, Ye, // input exponents
|
||||
input logic [`NF:0] Xm, Ym, // input mantissa
|
||||
input logic XZero, YZero, // is zero
|
||||
input logic XNaN, YNaN, // is NaN
|
||||
input logic XSNaN, YSNaN, // is signaling NaN
|
||||
input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
|
||||
output logic CmpNV, // invalid flag
|
||||
output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
|
||||
output logic [`XLEN-1:0] CmpIntRes // compare integer result
|
||||
);
|
||||
|
||||
logic LTabs, LT, EQ; // is X < or > or = Y
|
||||
logic [`FLEN-1:0] NaNRes;
|
||||
logic BothZero, EitherNaN, EitherSNaN;
|
||||
logic LTabs, LT, EQ; // is X < or > or = Y
|
||||
logic [`FLEN-1:0] NaNRes; // NaN result
|
||||
logic BothZero; // are both inputs zero
|
||||
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
|
||||
|
||||
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
|
||||
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
|
||||
// assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression
|
||||
assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
|
||||
assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
|
||||
assign EQ = (X == Y);
|
||||
|
||||
//assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]});
|
||||
//assign LT = XInt < YInt;
|
||||
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
|
||||
assign EQ = (FSrcXE == FSrcYE);
|
||||
|
||||
assign BothZero = XZeroE&YZeroE;
|
||||
assign EitherNaN = XNaNE|YNaNE;
|
||||
assign EitherSNaN = XSNaNE|YSNaNE;
|
||||
assign BothZero = XZero&YZero;
|
||||
assign EitherNaN = XNaN|YNaN;
|
||||
assign EitherSNaN = XSNaN|YSNaN;
|
||||
|
||||
|
||||
// flags
|
||||
@ -74,78 +71,91 @@ module fcmp (
|
||||
// LT/LE - signaling - sets invalid if NaN input
|
||||
// EQ - quiet - sets invalid if signaling NaN input
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b110: CmpNVE = EitherSNaN;//min
|
||||
3'b101: CmpNVE = EitherSNaN;//max
|
||||
3'b010: CmpNVE = EitherSNaN;//equal
|
||||
3'b001: CmpNVE = EitherNaN;//less than
|
||||
3'b011: CmpNVE = EitherNaN;//less than or equal
|
||||
default: CmpNVE = 1'bx;
|
||||
case (OpCtrl[2:0])
|
||||
3'b110: CmpNV = EitherSNaN;//min
|
||||
3'b101: CmpNV = EitherSNaN;//max
|
||||
3'b010: CmpNV = EitherSNaN;//equal
|
||||
3'b001: CmpNV = EitherNaN;//less than
|
||||
3'b011: CmpNV = EitherNaN;//less than or equal
|
||||
default: CmpNV = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
// Min/Max
|
||||
// - outputs the min/max of X and Y
|
||||
// - -0 < 0
|
||||
// - if both are NaN return quiet X
|
||||
// - if one is a NaN output the non-NaN
|
||||
// LT/LE/EQ
|
||||
// - -0 = 0
|
||||
// - inf = inf and -inf = -inf
|
||||
// - return 0 if comparison with NaN (unordered)
|
||||
|
||||
// fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
|
||||
// for IEEE, return the payload of X
|
||||
// for RISC-V, return the canonical NaN
|
||||
|
||||
|
||||
// select the NaN result
|
||||
if (`FPSIZES == 1)
|
||||
if(`IEEE754) assign NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
|
||||
if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
|
||||
else if (`FPSIZES == 2)
|
||||
if(`IEEE754) assign NaNRes = FmtE ? {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
|
||||
else assign NaNRes = FmtE ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
|
||||
else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
|
||||
else if (`FPSIZES == 3)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
`FMT:
|
||||
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
|
||||
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
`FMT1:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
|
||||
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
`FMT2:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
|
||||
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
default: NaNRes = {`FLEN{1'bx}};
|
||||
endcase
|
||||
|
||||
else if (`FPSIZES == 4)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
2'h3:
|
||||
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
|
||||
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
|
||||
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
2'h1:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, XSgnE, {`D_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`D_NF]};
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
|
||||
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
2'h0:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, XSgnE, {`S_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`S_NF]};
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
|
||||
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
2'h2:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, XSgnE, {`H_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`H_NF]};
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
|
||||
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
endcase
|
||||
|
||||
// when one input is a NaN -output the non-NaN
|
||||
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
|
||||
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
|
||||
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
|
||||
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
|
||||
|
||||
|
||||
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
|
||||
// Min/Max
|
||||
// - outputs the min/max of X and Y
|
||||
// - -0 < 0
|
||||
// - if both are NaN return quiet X
|
||||
// - if one is a NaN output the non-NaN
|
||||
always_comb
|
||||
if(OpCtrl[0]) // MAX
|
||||
if(XNaN)
|
||||
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
|
||||
else CmpFpRes = Y; // X = NaN Y != NaN
|
||||
else
|
||||
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
|
||||
else // X,Y != NaN
|
||||
if(LT) CmpFpRes = Y; // X < Y
|
||||
else CmpFpRes = X; // X > Y
|
||||
else // MIN
|
||||
if(XNaN)
|
||||
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
|
||||
else CmpFpRes = Y; // X = NaN Y != NaN
|
||||
else
|
||||
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
|
||||
else // X,Y != NaN
|
||||
if(LT) CmpFpRes = X; // X < Y
|
||||
else CmpFpRes = Y; // X > Y
|
||||
|
||||
// LT/LE/EQ
|
||||
// - -0 = 0
|
||||
// - inf = inf and -inf = -inf
|
||||
// - return 0 if comparison with NaN (unordered)
|
||||
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
|
||||
|
||||
endmodule
|
||||
|
@ -29,25 +29,41 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fctrl (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic StallE, StallM, StallW, // stall signals
|
||||
input logic FlushE, FlushM, FlushW, // flush signals
|
||||
input logic [31:0] InstrD,
|
||||
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
|
||||
input logic [6:0] OpD, // bits 6:0 of instruction
|
||||
input logic [4:0] Rs2D, // bits 24:20 of instruction
|
||||
input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode
|
||||
input logic [2:0] FRM_REGW, // rounding mode from CSR
|
||||
input logic [1:0] STATUS_FS, // is FPU enabled?
|
||||
input logic FDivBusyE, // is the divider busy
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic FRegWriteD, // FP register write enable
|
||||
output logic FDivStartD, // Start division or squareroot
|
||||
output logic [1:0] FResSelD, // select result to be written to fp register
|
||||
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
|
||||
output logic [1:0] PostProcSelD,
|
||||
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
|
||||
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
output logic FWriteIntD // is the result written to the integer register
|
||||
output logic FRegWriteM, FRegWriteW, // FP register write enable
|
||||
output logic [2:0] FrmM, // FP rounding mode
|
||||
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic DivStartE, // Start division or squareroot
|
||||
output logic FWriteIntE, FWriteIntM, // Write to integer register
|
||||
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
|
||||
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
|
||||
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
|
||||
output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input
|
||||
);
|
||||
|
||||
`define FCTRLW 11
|
||||
logic [`FCTRLW-1:0] ControlsD;
|
||||
logic FRegWriteD; // FP register write enable
|
||||
logic DivStartD; // integer register write enable
|
||||
logic FWriteIntD; // integer register write enable
|
||||
logic FRegWriteE; // FP register write enable
|
||||
logic [2:0] OpCtrlD; // Select which opperation to do in each component
|
||||
logic [1:0] PostProcSelD; // select result in the post processing unit
|
||||
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
|
||||
logic [2:0] FrmD, FrmE; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtD; // FP format
|
||||
//*** will putting x for don't cares reduce area in synthisis???
|
||||
// FPU Instruction Decoder
|
||||
always_comb
|
||||
@ -130,7 +146,7 @@ module fctrl (
|
||||
endcase
|
||||
|
||||
// unswizzle control bits
|
||||
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
|
||||
// rounding modes:
|
||||
// 000 - round to nearest, ties to even
|
||||
@ -168,7 +184,7 @@ module fctrl (
|
||||
// 10 fma
|
||||
|
||||
// Other Sel:
|
||||
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
|
||||
// Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
|
||||
// 000 - sign 00
|
||||
// 001 - negate sign 00
|
||||
// 010 - xor sign 00
|
||||
@ -205,5 +221,20 @@ module fctrl (
|
||||
// 01 - negate sign
|
||||
// 10 - xor sign
|
||||
|
||||
// D/E pipleine register
|
||||
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
|
||||
// E/M pipleine register
|
||||
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
|
||||
// M/W pipleine register
|
||||
flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResSelM},
|
||||
{FRegWriteW, FResSelW});
|
||||
|
||||
endmodule
|
||||
|
@ -35,7 +35,7 @@ module fcvt (
|
||||
input logic [`NE-1:0] Xe, // input's exponent
|
||||
input logic [`NF:0] Xm, // input's fraction
|
||||
input logic [`XLEN-1:0] Int, // integer input - from IEU
|
||||
input logic [2:0] FOpCtrl, // choose which opperation (look below for values)
|
||||
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic ToInt, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZero, // is the input zero
|
||||
input logic XDenorm, // is the input denormalized
|
||||
@ -73,17 +73,17 @@ module fcvt (
|
||||
|
||||
|
||||
// seperate OpCtrl for code readability
|
||||
assign Signed = FOpCtrl[0];
|
||||
assign Int64 = FOpCtrl[1];
|
||||
assign IntToFp = FOpCtrl[2];
|
||||
assign Signed = OpCtrl[0];
|
||||
assign Int64 = OpCtrl[1];
|
||||
assign IntToFp = OpCtrl[2];
|
||||
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - int -> fp: Fmt contains the percision of the output
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT);
|
||||
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0];
|
||||
assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
@ -31,20 +31,20 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fhazard(
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
|
||||
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
|
||||
input logic [4:0] RdM, RdW, // the adress being written to
|
||||
input logic [1:0] FResSelM, // the result being selected
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
|
||||
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
|
||||
input logic [4:0] RdM, RdW, // the adress being written to
|
||||
input logic [1:0] FResSelM, // the result being selected
|
||||
output logic FStallD, // stall the decode stage
|
||||
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
|
||||
output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value
|
||||
);
|
||||
|
||||
|
||||
always_comb begin
|
||||
// set defaults
|
||||
FForwardXE = 2'b00; // choose FRD1E
|
||||
FForwardYE = 2'b00; // choose FRD2E
|
||||
FForwardZE = 2'b00; // choose FRD3E
|
||||
ForwardXE = 2'b00; // choose FRD1E
|
||||
ForwardYE = 2'b00; // choose FRD2E
|
||||
ForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
|
||||
@ -52,28 +52,28 @@ module fhazard(
|
||||
// if the needed value is in the memory stage - input 1
|
||||
if ((Adr1E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
// if the needed value is in the memory stage - input 2
|
||||
if ((Adr2E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
// if the needed value is in the memory stage - input 3
|
||||
if ((Adr3E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
end
|
||||
|
||||
|
@ -34,7 +34,7 @@ module fma(
|
||||
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
|
||||
input logic XZero, YZero, ZZero, // is the input zero
|
||||
input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad
|
||||
output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
|
||||
output logic ZmSticky, // sticky bit that is calculated during alignment
|
||||
@ -46,7 +46,7 @@ module fma(
|
||||
output logic Ps, // the product's sign
|
||||
output logic Ss, // the sum's sign
|
||||
output logic [`NE+1:0] Se,
|
||||
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count
|
||||
output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count
|
||||
);
|
||||
|
||||
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
|
||||
@ -72,7 +72,7 @@ module fma(
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// calculate the signs and take the opperation into account
|
||||
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
|
||||
sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
|
||||
|
||||
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
|
||||
.Am, .ZmSticky, .KillProd);
|
||||
@ -85,7 +85,7 @@ module fma(
|
||||
|
||||
add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
|
||||
|
||||
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
|
||||
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt);
|
||||
endmodule
|
||||
|
||||
|
||||
@ -120,7 +120,7 @@ endmodule
|
||||
|
||||
|
||||
module sign(
|
||||
input logic [2:0] FOpCtrl, // opperation contol
|
||||
input logic [2:0] OpCtrl, // opperation contol
|
||||
input logic Xs, Ys, Zs, // sign of the inputs
|
||||
output logic Ps, // the product's sign - takes opperation into account
|
||||
output logic As // aligned addend sign used in fma - takes opperation into account
|
||||
@ -130,9 +130,9 @@ module sign(
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
|
||||
// flip is negation opperation
|
||||
assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
|
||||
assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
|
||||
// flip if subtraction
|
||||
assign As = Zs^FOpCtrl[0];
|
||||
assign As = Zs^OpCtrl[0];
|
||||
|
||||
endmodule
|
||||
|
||||
@ -275,7 +275,7 @@ endmodule
|
||||
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
|
||||
input logic [3*`NF+6:0] A, // addend
|
||||
input logic [2*`NF+3:0] P, // product
|
||||
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result
|
||||
output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result
|
||||
);
|
||||
|
||||
logic [3*`NF+6:0] T;
|
||||
@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
|
||||
|
||||
|
||||
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt));
|
||||
|
||||
endmodule
|
||||
|
@ -32,7 +32,7 @@ module fmashiftcalc(
|
||||
input logic [3*`NF+5:0] FmaSm, // the positive sum
|
||||
input logic [`NE-1:0] Ze, // exponent of Z
|
||||
input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count
|
||||
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic FmaKillProd, // is the product set to zero
|
||||
input logic [`NE+1:0] FmaSe,
|
||||
@ -52,7 +52,7 @@ module fmashiftcalc(
|
||||
// Determine if the sum is zero
|
||||
assign FmaSZero = ~(|FmaSm);
|
||||
// calculate the sum's exponent
|
||||
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
|
||||
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4);
|
||||
|
||||
//convert the sum's exponent into the proper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
@ -152,7 +152,7 @@ module fmashiftcalc(
|
||||
// - shift once if killing a product and the result is denormalized
|
||||
assign FmaShiftIn = {3'b0, FmaSm};
|
||||
if (`FPSIZES == 1)
|
||||
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1;
|
||||
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
|
||||
else
|
||||
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1;
|
||||
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1;
|
||||
endmodule
|
||||
|
@ -30,28 +30,28 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpu (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [2:0] FRM_REGW, // Rounding mode from CSR
|
||||
input logic [31:0] InstrD, // instruction from IFU
|
||||
input logic [`FLEN-1:0] ReadDataW,// Read data from memory
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU)
|
||||
input logic StallE, StallM, StallW, // stall signals from HZU
|
||||
input logic FlushE, FlushM, FlushW, // flush signals from HZU
|
||||
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
|
||||
input logic [1:0] STATUS_FS, // Is floating-point enabled?
|
||||
output logic FRegWriteM, // FP register write enable
|
||||
output logic FpLoadStoreM, // Fp load instruction?
|
||||
output logic FStore2,
|
||||
output logic FStallD, // Stall the decode stage
|
||||
output logic FWriteIntE, // integer register write enables
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
|
||||
output logic [1:0] FResSelW,
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
|
||||
input logic [31:0] InstrD, // instruction (from IFU)
|
||||
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU)
|
||||
input logic StallE, StallM, StallW, // stall signals (from HZU)
|
||||
input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
|
||||
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
|
||||
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
|
||||
output logic FRegWriteM, // FP register write enable (to privileged unit)
|
||||
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
|
||||
output logic FStore2, // store two words into memory (to LSU)
|
||||
output logic FStallD, // Stall the decode stage (To HZU)
|
||||
output logic FWriteIntE, // integer register write enable (to IEU)
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
output logic [1:0] FResSelW, // final result selection (to IEU)
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
|
||||
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
|
||||
);
|
||||
|
||||
@ -62,99 +62,88 @@ module fpu (
|
||||
// - sets the underflow after rounding
|
||||
|
||||
// control signals
|
||||
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division or squareroot
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
|
||||
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
logic FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmM; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic DivStartE; // Start division or squareroot
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
|
||||
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
|
||||
// regfile signals
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
|
||||
logic XSgnM, YSgnM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
|
||||
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
|
||||
logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XNaNQ, YNaNQ; // is the input a NaN - divide
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
|
||||
logic XZeroQ, YZeroQ; // is the input zero - divide
|
||||
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XInfQ, YInfQ; // is the input infinity - divide
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic FmtQ;
|
||||
logic FOpCtrlQ;
|
||||
logic XsE, YsE, ZsE; // input's sign - execute stage
|
||||
logic XsM, YsM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage
|
||||
logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XNaNQ, YNaNQ; // is the input a NaN - divide
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
|
||||
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
|
||||
// Fma Signals
|
||||
logic [3*`NF+5:0] SumE, SumM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic [`NE+1:0] SeE,SeM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvAE, InvAM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic ZSgnEffE, ZSgnEffM;
|
||||
logic PSgnE, PSgnM;
|
||||
logic SsE, SsM;
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
|
||||
logic [3*`NF+5:0] SmE, SmM;
|
||||
logic [`NE+1:0] PeE, PeM;
|
||||
logic ZmStickyE, ZmStickyM;
|
||||
logic [`NE+1:0] SeE,SeM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvAE, InvAM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic AsE, AsM;
|
||||
logic PsE, PsM;
|
||||
logic SsE, SsM;
|
||||
logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM;
|
||||
|
||||
// Cvt Signals
|
||||
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic [`NE:0] CeE, CeM; // the calculated expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
|
||||
logic CvtResSgnE, CvtResSgnM; // the result's sign
|
||||
logic CsE, CsM; // the result's sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
|
||||
//divide signals
|
||||
logic [`QLEN-1-(`RADIX/4):0] QuotM;
|
||||
logic [`NE+1:0] DivCalcExpE, DivCalcExpM;
|
||||
logic DivStickyE, DivStickyM;
|
||||
logic DivDoneM;
|
||||
logic [`DURLEN-1:0] EarlyTermShiftM;
|
||||
logic [`QLEN-1-(`RADIX/4):0] QmM;
|
||||
logic [`NE+1:0] QeE, QeM;
|
||||
logic DivSE, DivSM;
|
||||
logic DivDoneM;
|
||||
logic [`DURLEN-1:0] EarlyTermShiftM;
|
||||
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
logic [4:0] FDivFlgM; // divide/squareroot flags
|
||||
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`XLEN-1:0] FIntResE; // classify result
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // classify result
|
||||
logic [`FLEN-1:0] PostProcResM; // classify result
|
||||
logic [4:0] PostProcFlgM; // classify result
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`XLEN-1:0] FIntResE; // classify result
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // classify result
|
||||
logic [`FLEN-1:0] PostProcResM; // classify result
|
||||
logic [4:0] PostProcFlgM; // classify result
|
||||
logic [`XLEN-1:0] FCvtIntResM;
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
|
||||
// other signals
|
||||
logic FDivSqrtDoneE; // is divide done
|
||||
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
|
||||
logic load_preload; // enable for FF on fpdivsqrt
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
|
||||
@ -171,9 +160,11 @@ module fpu (
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate FP control signals
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
|
||||
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
|
||||
.FmtD, .FrmD, .FWriteIntD);
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
|
||||
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
|
||||
.reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
|
||||
.DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM,
|
||||
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
|
||||
|
||||
// FP register file
|
||||
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
@ -185,12 +176,6 @@ module fpu (
|
||||
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
|
||||
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
|
||||
|
||||
// EXECUTION STAGE
|
||||
|
||||
@ -207,12 +192,12 @@ module fpu (
|
||||
// Hazard unit for FPU
|
||||
// - determines if any forwarding or stalls are needed
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
|
||||
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
|
||||
.FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE);
|
||||
|
||||
|
||||
generate
|
||||
@ -227,7 +212,7 @@ module fpu (
|
||||
endgenerate
|
||||
|
||||
|
||||
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
|
||||
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions
|
||||
|
||||
// Force Z to be 0 for multiply instructions
|
||||
generate
|
||||
@ -241,55 +226,76 @@ module fpu (
|
||||
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
endgenerate
|
||||
|
||||
mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
|
||||
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE);
|
||||
|
||||
// unpack unit
|
||||
// - splits FP inputs into their various parts
|
||||
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
|
||||
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
|
||||
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
|
||||
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
|
||||
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE),
|
||||
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE),
|
||||
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
|
||||
.ZInf(ZInfE), .XExpMax(XExpMaxE));
|
||||
|
||||
// fma - does multiply, add, and multiply-add instructions
|
||||
fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE),
|
||||
.Xe(XExpE), .Ye(YExpE), .Ze(ZExpE),
|
||||
.Xm(XManE), .Ym(YManE), .Zm(ZManE),
|
||||
// fused multiply add
|
||||
// - fadd/fsub
|
||||
// - fmul
|
||||
// - fmadd/fnmadd/fmsub/fnmsub
|
||||
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE),
|
||||
.Xe(XeE), .Ye(YeE), .Ze(ZeE),
|
||||
.Xm(XmE), .Ym(YmE), .Zm(ZmE),
|
||||
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
|
||||
.FOpCtrl(FOpCtrlE), .Fmt(FmtE),
|
||||
.As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE),
|
||||
.Sm(SumE), .Pe(ProdExpE),
|
||||
.NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE),
|
||||
.ZmSticky(AddendStickyE), .KillProd(KillProdE));
|
||||
.OpCtrl(OpCtrlE), .Fmt(FmtE),
|
||||
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
|
||||
.Sm(SmE), .Pe(PeE),
|
||||
.NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE),
|
||||
.ZmSticky(ZmStickyE), .KillProd(KillProdE));
|
||||
|
||||
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
|
||||
.StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
|
||||
.EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
|
||||
// other FP execution units
|
||||
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
|
||||
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
|
||||
// divide and squareroot
|
||||
// - fdiv
|
||||
// - fsqrt
|
||||
// *** add other opperations
|
||||
divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE,
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE),
|
||||
.StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
|
||||
.EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
|
||||
// compare
|
||||
// - fmin/fmax
|
||||
// - flt/fle/feq
|
||||
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
|
||||
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
|
||||
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
|
||||
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
|
||||
// sign injection
|
||||
// - fsgnj/fsgnjx/fsgnjn
|
||||
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
|
||||
|
||||
fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE),
|
||||
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE),
|
||||
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE),
|
||||
// classify
|
||||
// - fclass
|
||||
fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE),
|
||||
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
|
||||
|
||||
// convert
|
||||
// - fcvt.*.*
|
||||
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
|
||||
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE),
|
||||
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE),
|
||||
.LzcIn(CvtLzcInE));
|
||||
|
||||
// data to be stored in memory - to IEU
|
||||
// - FP uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
if (`LLEN==`XLEN) begin
|
||||
assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
assign FWriteDataE = YE[`XLEN-1:0];
|
||||
end else begin
|
||||
logic [`FLEN-1:0] FWriteDataE;
|
||||
if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
|
||||
else assign FStore2 = FmtM;
|
||||
|
||||
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
|
||||
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
|
||||
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
|
||||
if (`FPSIZES==1) assign FWriteDataE = YE;
|
||||
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
|
||||
else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
|
||||
|
||||
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
|
||||
end
|
||||
@ -306,14 +312,14 @@ module fpu (
|
||||
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
endgenerate
|
||||
// select a result that may be written to the FP register
|
||||
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
|
||||
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
|
||||
|
||||
// select the result that may be written to the integer register - to IEU
|
||||
if (`FLEN>`XLEN)
|
||||
assign IntSrcXE = FSrcXE[`XLEN-1:0];
|
||||
assign IntSrcXE = XE[`XLEN-1:0];
|
||||
else
|
||||
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
|
||||
assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE};
|
||||
|
||||
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
|
||||
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
|
||||
@ -321,27 +327,24 @@ module fpu (
|
||||
|
||||
// E/M pipe registers
|
||||
|
||||
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
|
||||
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
|
||||
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
|
||||
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM);
|
||||
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
|
||||
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM});
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
|
||||
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
|
||||
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
|
||||
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
|
||||
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
|
||||
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);
|
||||
flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE},
|
||||
{AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM});
|
||||
{ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE},
|
||||
{ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM});
|
||||
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
|
||||
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
|
||||
{CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
|
||||
{CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
|
||||
|
||||
// BEGIN MEMORY STAGE
|
||||
|
||||
@ -357,11 +360,11 @@ module fpu (
|
||||
|
||||
assign FpLoadStoreM = FResSelM[1];
|
||||
|
||||
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
|
||||
.FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
|
||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
|
||||
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM),
|
||||
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
|
||||
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM),
|
||||
.FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
|
||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM),
|
||||
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
|
||||
.CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
|
||||
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
@ -371,9 +374,6 @@ module fpu (
|
||||
// M/W pipe registers
|
||||
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResSelM, FmtM},
|
||||
{FRegWriteW, FResSelW, FmtW});
|
||||
|
||||
// BEGIN WRITEBACK STAGE
|
||||
|
||||
|
@ -26,60 +26,59 @@
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fsgninj (
|
||||
input logic XSgnE, YSgnE, // X and Y sign bits
|
||||
input logic [`FLEN-1:0] FSrcXE, // X
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [1:0] SgnOpCodeE, // operation control
|
||||
output logic [`FLEN-1:0] SgnResE // result
|
||||
input logic Xs, Ys, // X and Y sign bits
|
||||
input logic [`FLEN-1:0] X, // X
|
||||
input logic [`FMTBITS-1:0] Fmt, // format
|
||||
input logic [1:0] OpCtrl, // operation control
|
||||
output logic [`FLEN-1:0] SgnRes // result
|
||||
);
|
||||
|
||||
logic ResSgn;
|
||||
|
||||
//op code designation:
|
||||
//
|
||||
//00 - fsgnj - directly copy over sign value of FSrcYE
|
||||
//01 - fsgnjn - negate sign value of FSrcYE
|
||||
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
|
||||
//
|
||||
// OpCtrl:
|
||||
// 00 - fsgnj - directly copy over sign value of Y
|
||||
// 01 - fsgnjn - negate sign value of Y
|
||||
// 10 - fsgnjx - XOR sign values of X and Y
|
||||
|
||||
// calculate the result's sign
|
||||
assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
|
||||
assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
|
||||
|
||||
// format final result based on precision
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
|
||||
if (`FPSIZES == 1)
|
||||
assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
|
||||
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
|
||||
|
||||
else if (`FPSIZES == 2)
|
||||
assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]};
|
||||
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
|
||||
|
||||
else if (`FPSIZES == 3) begin
|
||||
logic [2:0] SgnBits;
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]};
|
||||
`FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]};
|
||||
case (Fmt)
|
||||
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
|
||||
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
|
||||
`FMT2: SgnBits = {2'b11, ResSgn};
|
||||
default: SgnBits = {3{1'bx}};
|
||||
endcase
|
||||
assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]};
|
||||
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
|
||||
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [3:0] SgnBits;
|
||||
always_comb
|
||||
case (FmtE)
|
||||
`Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
|
||||
`D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
|
||||
`S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]};
|
||||
case (Fmt)
|
||||
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
|
||||
`D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
|
||||
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
|
||||
`H_FMT: SgnBits = {3'b111, ResSgn};
|
||||
endcase
|
||||
assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]};
|
||||
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -107,6 +107,6 @@ module otfc4 (
|
||||
QMNext = {QMR, 2'b11};
|
||||
end
|
||||
end
|
||||
// Final Quoteint is in the range [.5, 2)
|
||||
// Final Qmeint is in the range [.5, 2)
|
||||
|
||||
endmodule
|
||||
|
@ -36,7 +36,7 @@ module postprocess (
|
||||
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
|
||||
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrl, // choose which opperation (look below for values)
|
||||
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic XZero, YZero, ZZero, // inputs are zero
|
||||
input logic XInf, YInf, ZInf, // inputs are infinity
|
||||
input logic XNaN, YNaN, ZNaN, // inputs are NaN
|
||||
@ -54,7 +54,7 @@ module postprocess (
|
||||
input logic FmaNegSum, // was the sum negitive
|
||||
input logic FmaInvA, // do you invert Z
|
||||
input logic FmaSs,
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count
|
||||
//divide signals
|
||||
input logic [`DURLEN-1:0] DivEarlyTermShift,
|
||||
input logic DivS,
|
||||
@ -125,14 +125,14 @@ module postprocess (
|
||||
logic Sqrt;
|
||||
|
||||
// signals to help readability
|
||||
assign Signed = FOpCtrl[0];
|
||||
assign Int64 = FOpCtrl[1];
|
||||
assign IntToFp = FOpCtrl[2];
|
||||
assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0];
|
||||
assign Signed = OpCtrl[0];
|
||||
assign Int64 = OpCtrl[1];
|
||||
assign IntToFp = OpCtrl[2];
|
||||
assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
|
||||
assign CvtOp = (PostProcSel == 2'b00);
|
||||
assign FmaOp = (PostProcSel == 2'b10);
|
||||
assign DivOp = (PostProcSel == 2'b01)&DivDone;
|
||||
assign Sqrt = FOpCtrl[0];
|
||||
assign Sqrt = OpCtrl[0];
|
||||
|
||||
// is there an input of infinity or NaN being used
|
||||
assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
|
||||
@ -142,9 +142,9 @@ module postprocess (
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - otherwise: Fmt contains the percision of the output
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT);
|
||||
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0];
|
||||
assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
@ -152,7 +152,7 @@ module postprocess (
|
||||
|
||||
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
|
||||
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
|
||||
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
|
||||
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
|
@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits
|
||||
// for efficiency. You can probably optimize your logic to
|
||||
// select the proper divisor with less delay.
|
||||
|
||||
// Quotient equations from EE371 lecture notes 13-20
|
||||
// Qmient equations from EE371 lecture notes 13-20
|
||||
assign p = ps ^ pc;
|
||||
assign g = ps & pc;
|
||||
|
||||
|
@ -43,7 +43,7 @@ module shiftcorrection(
|
||||
output logic [`NE+1:0] FmaMe // exponent of the normalized sum
|
||||
);
|
||||
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
|
||||
logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
|
||||
logic ResDenorm; // is the result denormalized
|
||||
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
|
||||
|
||||
@ -53,11 +53,11 @@ module shiftcorrection(
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
|
||||
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
|
||||
assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
|
||||
assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
|
||||
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
|
||||
always_comb
|
||||
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
|
||||
else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted;
|
||||
else if (DivOp&~DivResDenorm) Mf = CorrQmShifted;
|
||||
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
|
@ -37,15 +37,15 @@ module srt(
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic [`DIVLEN-1:0] X,
|
||||
input logic [`DIVLEN-1:0] Dpreproc,
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
input logic NegSticky,
|
||||
output logic [`QLEN-1-(`RADIX/4):0] Quot,
|
||||
input logic [`DIVLEN-1:0] X,
|
||||
input logic [`DIVLEN-1:0] Dpreproc,
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
input logic NegSticky,
|
||||
output logic [`QLEN-1-(`RADIX/4):0] Qm,
|
||||
output logic [`DIVLEN+3:0] NextWSN, NextWCN,
|
||||
output logic [`DIVLEN+3:0] StickyWSA,
|
||||
output logic [`DIVLEN+3:0] FirstWS, FirstWC,
|
||||
output logic [`NE+1:0] DivCalcExpM,
|
||||
output logic [`NE+1:0] QeM,
|
||||
output logic [`XLEN-1:0] Rem
|
||||
);
|
||||
|
||||
@ -62,7 +62,7 @@ module srt(
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
logic [`DIVLEN+3:0] WSN, WCN;
|
||||
logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
|
||||
logic [`NE+1:0] DivCalcExp;
|
||||
logic [`NE+1:0] Qe;
|
||||
logic [$clog2(`XLEN+1)-1:0] intExp;
|
||||
logic intSign;
|
||||
logic [`QLEN-1:0] QMMux;
|
||||
@ -88,7 +88,7 @@ module srt(
|
||||
mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
|
||||
flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
|
||||
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
|
||||
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
|
||||
flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
|
||||
|
||||
|
||||
// Divisor Selections
|
||||
@ -123,7 +123,7 @@ module srt(
|
||||
flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
|
||||
flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
|
||||
|
||||
assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
|
||||
assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
|
||||
assign FirstWS = WS[0];
|
||||
assign FirstWC = WC[0];
|
||||
if(`RADIX==2)
|
||||
@ -132,7 +132,7 @@ module srt(
|
||||
else
|
||||
assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
|
||||
|
||||
expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
|
||||
expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe);
|
||||
|
||||
endmodule
|
||||
|
||||
@ -155,7 +155,7 @@ module divinteration (
|
||||
logic [3:0] q;
|
||||
logic qp, qz;//, qn;
|
||||
|
||||
// Quotient Selection logic
|
||||
// Qmient Selection logic
|
||||
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
|
||||
// q encoding:
|
||||
// 1000 = +2
|
||||
@ -226,7 +226,7 @@ module expcalc(
|
||||
input logic [`NE-1:0] Xe, Ye,
|
||||
input logic XZeroE,
|
||||
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
|
||||
output logic [`NE+1:0] DivCalcExp
|
||||
output logic [`NE+1:0] Qe
|
||||
);
|
||||
logic [`NE-2:0] Bias;
|
||||
|
||||
@ -255,5 +255,5 @@ module expcalc(
|
||||
endcase
|
||||
end
|
||||
// correct exponent for denormalized input's normalization shifts
|
||||
assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
|
||||
assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
|
||||
endmodule
|
@ -43,7 +43,7 @@ module srtfsm(
|
||||
input logic [`DIVLEN+3:0] StickyWSA,
|
||||
input logic [`DURLEN-1:0] Dur,
|
||||
output logic [`DURLEN-1:0] EarlyTermShiftE,
|
||||
output logic DivStickyE,
|
||||
output logic DivSE,
|
||||
output logic DivDone,
|
||||
output logic NegSticky,
|
||||
output logic DivBusy
|
||||
@ -65,9 +65,9 @@ module srtfsm(
|
||||
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
|
||||
// radix-4 division can't create a QM that continually adds 0's
|
||||
if (`RADIX == 2)
|
||||
assign DivStickyE = |W&~(StickyWSA == WS);
|
||||
assign DivSE = |W&~(StickyWSA == WS);
|
||||
else
|
||||
assign DivStickyE = |W;
|
||||
assign DivSE = |W;
|
||||
assign DivDone = (state == DONE);
|
||||
assign W = WC+WS;
|
||||
assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
|
||||
|
@ -30,35 +30,34 @@
|
||||
|
||||
module unpack (
|
||||
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
|
||||
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
|
||||
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
|
||||
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
|
||||
output logic XDenormE, ZDenormE, // is XYZ denormalized
|
||||
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
|
||||
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
|
||||
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
|
||||
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
output logic Xs, Ys, Zs, // sign bits of XYZ
|
||||
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
|
||||
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
|
||||
output logic XDenorm, ZDenorm, // is XYZ denormalized
|
||||
output logic XZero, YZero, ZZero, // is XYZ zero
|
||||
output logic XInf, YInf, ZInf, // is XYZ infinity
|
||||
output logic XExpMax // does X have the maximum exponent (NaN or Inf)
|
||||
);
|
||||
|
||||
logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
|
||||
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
|
||||
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s
|
||||
logic YExpMax, ZExpMax; // is the exponent all 1s
|
||||
|
||||
unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE),
|
||||
.NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
|
||||
.Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
|
||||
unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm),
|
||||
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
|
||||
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
|
||||
|
||||
unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE),
|
||||
.NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
|
||||
.Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
|
||||
unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym),
|
||||
.NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
|
||||
.Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
|
||||
|
||||
unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE),
|
||||
.NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
|
||||
.Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
|
||||
unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm),
|
||||
.NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
|
||||
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
|
||||
// is the input denormalized
|
||||
assign XDenormE = ~XExpNonZero & ~XFracZero;
|
||||
assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
|
||||
assign XDenorm = ~XExpNonZero & ~XFracZero;
|
||||
assign ZDenorm = ~ZExpNonZero & ~ZFracZero;
|
||||
endmodule
|
@ -30,7 +30,7 @@
|
||||
|
||||
module unpackinput (
|
||||
input logic [`FLEN-1:0] In, // inputs from register file
|
||||
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
|
||||
output logic Sgn, // sign bits of XYZ
|
||||
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
|
||||
@ -74,16 +74,16 @@ module unpackinput (
|
||||
// quad and half
|
||||
// double and half
|
||||
|
||||
assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
|
||||
assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
|
||||
|
||||
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
|
||||
assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
|
||||
assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
|
||||
|
||||
// extract the fraction, add trailing zeroes to the mantissa if nessisary
|
||||
assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
|
||||
// is the exponent non-zero
|
||||
assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
|
||||
assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
|
||||
|
||||
// example double to single conversion:
|
||||
// 1023 = 0011 1111 1111
|
||||
@ -95,10 +95,10 @@ module unpackinput (
|
||||
|
||||
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
|
||||
// - if the original precision had a denormal number convert the exponent value 1
|
||||
assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
|
||||
// is the exponent all 1's
|
||||
assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
|
||||
assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
|
||||
|
||||
|
||||
end else if (`FPSIZES == 3) begin // three floating point precsions supported
|
||||
@ -122,7 +122,7 @@ module unpackinput (
|
||||
|
||||
// Check NaN boxing
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
`FMT: BadNaNBox = 0;
|
||||
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
|
||||
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
|
||||
@ -131,7 +131,7 @@ module unpackinput (
|
||||
|
||||
// extract the sign bit
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
`FMT: Sgn = In[`FLEN-1];
|
||||
`FMT1: Sgn = In[`LEN1-1];
|
||||
`FMT2: Sgn = In[`LEN2-1];
|
||||
@ -140,7 +140,7 @@ module unpackinput (
|
||||
|
||||
// extract the fraction
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
`FMT: Frac = In[`NF-1:0];
|
||||
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
@ -149,7 +149,7 @@ module unpackinput (
|
||||
|
||||
// is the exponent non-zero
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
|
||||
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
|
||||
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
|
||||
@ -166,7 +166,7 @@ module unpackinput (
|
||||
|
||||
// convert the larger precision's exponent to use the largest precision's bias
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
|
||||
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
|
||||
@ -175,7 +175,7 @@ module unpackinput (
|
||||
|
||||
// is the exponent all 1's
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
`FMT: ExpMax = &In[`FLEN-2:`NF];
|
||||
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
|
||||
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
|
||||
@ -194,7 +194,7 @@ module unpackinput (
|
||||
|
||||
// Check NaN boxing
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
2'b11: BadNaNBox = 0;
|
||||
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
|
||||
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
|
||||
@ -203,7 +203,7 @@ module unpackinput (
|
||||
|
||||
// extract sign bit
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
2'b11: Sgn = In[`Q_LEN-1];
|
||||
2'b01: Sgn = In[`D_LEN-1];
|
||||
2'b00: Sgn = In[`S_LEN-1];
|
||||
@ -213,7 +213,7 @@ module unpackinput (
|
||||
|
||||
// extract the fraction
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
2'b11: Frac = In[`Q_NF-1:0];
|
||||
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
|
||||
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
|
||||
@ -222,7 +222,7 @@ module unpackinput (
|
||||
|
||||
// is the exponent non-zero
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
|
||||
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
|
||||
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
|
||||
@ -240,7 +240,7 @@ module unpackinput (
|
||||
|
||||
// convert the double precsion exponent into quad precsion
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
|
||||
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
|
||||
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
|
||||
@ -250,7 +250,7 @@ module unpackinput (
|
||||
|
||||
// is the exponent all 1's
|
||||
always_comb
|
||||
case (FmtE)
|
||||
case (Fmt)
|
||||
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
|
||||
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
|
||||
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
|
||||
|
Loading…
Reference in New Issue
Block a user