forked from Github_Repos/cvw
moved ctrl signal registers into fctrl, also a lot of code cleaning
This commit is contained in:
@ -34,20 +34,20 @@ module divsqrt(
input logic clk,
input logic reset,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NF:0] XManE, YManE,
input logic [`NE-1:0] XExpE, YExpE,
input logic [`NF:0] XmE, YmE,
input logic [`NE-1:0] XeE, YeE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStartE,
input logic StallM,
input logic StallE,
output logic DivStickyM,
input logic StallE,
output logic DivSM,
output logic DivBusy,
output logic DivDone,
output logic [`NE+1:0] DivCalcExpM,
output logic [`NE+1:0] QeM,
output logic [`DURLEN-1:0] EarlyTermShiftM,
output logic [`QLEN-1-(`RADIX/4):0] QuotM
output logic [`QLEN-1-(`RADIX/4):0] QmM
// output logic [`XLEN-1:0] RemM,
@ -60,10 +60,10 @@ module divsqrt(
logic [`DURLEN-1:0] Dur;
logic NegSticky;
srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
.StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
.StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
.StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM);
@ -29,29 +29,29 @@
`include "wally-config.vh"
module fclassify (
input logic XSgnE, // sign bit
input logic XNaNE, // is NaN
input logic XSNaNE, // is signaling NaN
input logic XDenormE, // is denormal
input logic XZeroE, // is zero
input logic XInfE, // is infinity
output logic [`XLEN-1:0] ClassResE // classify result
input logic Xs, // sign bit
input logic XNaN, // is NaN
input logic XSNaN, // is signaling NaN
input logic XDenorm,// is denormal
input logic XZero, // is zero
input logic XInf, // is infinity
output logic [`XLEN-1:0] ClassRes// classify result
logic PInf, PZero, PNorm, PDenorm;
logic NInf, NZero, NNorm, NDenorm;
logic XNormE;
logic XNorm;
// determine the sub categories
assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE);
assign PInf = ~XSgnE&XInfE;
assign NInf = XSgnE&XInfE;
assign PNorm = ~XSgnE&XNormE;
assign NNorm = XSgnE&XNormE;
assign PDenorm = ~XSgnE&XDenormE;
assign NDenorm = XSgnE&XDenormE;
assign PZero = ~XSgnE&XZeroE;
assign NZero = XSgnE&XZeroE;
assign XNorm= ~(XNaN | XInf| XDenorm| XZero);
assign PInf = ~Xs&XInf;
assign NInf = Xs&XInf;
assign PNorm = ~Xs&XNorm;
assign NNorm = Xs&XNorm;
assign PDenorm = ~Xs&XDenorm;
assign NDenorm = Xs&XDenorm;
assign PZero = ~Xs&XZero;
assign NZero = Xs&XZero;
// determine sub category and combine into the result
// bit 0 - -Inf
@ -64,6 +64,6 @@ module fclassify (
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
@ -27,9 +27,10 @@
`include "wally-config.vh"
// FOpCtrlE values
// OpCtrl values
// 110 min
// 101 max
// 010 equal
@ -37,36 +38,32 @@
// 011 less than or equal
module fcmp (
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // see above table
input logic XSgnE, YSgnE, // input signs
input logic [`NE-1:0] XExpE, YExpE, // input exponents
input logic [`NF:0] XManE, YManE, // input mantissa
input logic XZeroE, YZeroE, // is zero
input logic XNaNE, YNaNE, // is NaN
input logic XSNaNE, YSNaNE, // is signaling NaN
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
output logic CmpNVE, // invalid flag
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
output logic [`XLEN-1:0] CmpIntResE // compare resilt
input logic [`FMTBITS-1:0] Fmt, // format of fp number
input logic [2:0] OpCtrl, // see above table
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Xe, Ye, // input exponents
input logic [`NF:0] Xm, Ym, // input mantissa
input logic XZero, YZero, // is zero
input logic XNaN, YNaN, // is NaN
input logic XSNaN, YSNaN, // is signaling NaN
input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
output logic CmpNV, // invalid flag
output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [`XLEN-1:0] CmpIntRes // compare integer result
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes;
logic BothZero, EitherNaN, EitherSNaN;
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes; // NaN result
logic BothZero; // are both inputs zero
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
// assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression
assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
assign EQ = (X == Y);
//assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]});
//assign LT = XInt < YInt;
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
assign EQ = (FSrcXE == FSrcYE);
assign BothZero = XZeroE&YZeroE;
assign EitherNaN = XNaNE|YNaNE;
assign EitherSNaN = XSNaNE|YSNaNE;
assign BothZero = XZero&YZero;
assign EitherNaN = XNaN|YNaN;
assign EitherSNaN = XSNaN|YSNaN;
// flags
@ -74,78 +71,91 @@ module fcmp (
// LT/LE - signaling - sets invalid if NaN input
// EQ - quiet - sets invalid if signaling NaN input
always_comb begin
case (FOpCtrlE[2:0])
3'b110: CmpNVE = EitherSNaN;//min
3'b101: CmpNVE = EitherSNaN;//max
3'b010: CmpNVE = EitherSNaN;//equal
3'b001: CmpNVE = EitherNaN;//less than
3'b011: CmpNVE = EitherNaN;//less than or equal
default: CmpNVE = 1'bx;
case (OpCtrl[2:0])
3'b110: CmpNV = EitherSNaN;//min
3'b101: CmpNV = EitherSNaN;//max
3'b010: CmpNV = EitherSNaN;//equal
3'b001: CmpNV = EitherNaN;//less than
3'b011: CmpNV = EitherNaN;//less than or equal
default: CmpNV = 1'bx;
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
// fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
// for IEEE, return the payload of X
// for RISC-V, return the canonical NaN
// select the NaN result
if (`FPSIZES == 1)
if(`IEEE754) assign NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
else if (`FPSIZES == 2)
if(`IEEE754) assign NaNRes = FmtE ? {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
else assign NaNRes = FmtE ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
else if (`FPSIZES == 3)
case (FmtE)
case (Fmt)
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = {`FLEN{1'bx}};
else if (`FPSIZES == 4)
case (FmtE)
case (Fmt)
if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, XSgnE, {`D_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`D_NF]};
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, XSgnE, {`S_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`S_NF]};
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, XSgnE, {`H_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`H_NF]};
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
// when one input is a NaN -output the non-NaN
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
if(OpCtrl[0]) // MAX
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = Y; // X < Y
else CmpFpRes = X; // X > Y
else // MIN
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = X; // X < Y
else CmpFpRes = Y; // X > Y
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
@ -29,25 +29,41 @@
`include "wally-config.vh"
module fctrl (
input logic clk,
input logic reset,
input logic StallE, StallM, StallW, // stall signals
input logic FlushE, FlushM, FlushW, // flush signals
input logic [31:0] InstrD,
input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision
input logic [6:0] OpD, // bits 6:0 of instruction
input logic [4:0] Rs2D, // bits 24:20 of instruction
input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode
input logic [2:0] FRM_REGW, // rounding mode from CSR
input logic [1:0] STATUS_FS, // is FPU enabled?
input logic FDivBusyE, // is the divider busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FRegWriteD, // FP register write enable
output logic FDivStartD, // Start division or squareroot
output logic [1:0] FResSelD, // select result to be written to fp register
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
output logic [1:0] PostProcSelD,
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic FWriteIntD // is the result written to the integer register
output logic FRegWriteM, FRegWriteW, // FP register write enable
output logic [2:0] FrmM, // FP rounding mode
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
output logic DivStartE, // Start division or squareroot
output logic FWriteIntE, FWriteIntM, // Write to integer register
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input
`define FCTRLW 11
logic [`FCTRLW-1:0] ControlsD;
logic FRegWriteD; // FP register write enable
logic DivStartD; // integer register write enable
logic FWriteIntD; // integer register write enable
logic FRegWriteE; // FP register write enable
logic [2:0] OpCtrlD; // Select which opperation to do in each component
logic [1:0] PostProcSelD; // select result in the post processing unit
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
logic [2:0] FrmD, FrmE; // FP rounding mode
logic [`FMTBITS-1:0] FmtD; // FP format
//*** will putting x for don't cares reduce area in synthisis???
// FPU Instruction Decoder
@ -130,7 +146,7 @@ module fctrl (
// unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD;
// rounding modes:
// 000 - round to nearest, ties to even
@ -168,7 +184,7 @@ module fctrl (
// 10 fma
// Other Sel:
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
// Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
// 000 - sign 00
// 001 - negate sign 00
// 010 - xor sign 00
@ -205,5 +221,20 @@ module fctrl (
// 01 - negate sign
// 10 - xor sign
// D/E pipleine register
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
// E/M pipleine register
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
// M/W pipleine register
flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM},
{FRegWriteW, FResSelW});
@ -35,7 +35,7 @@ module fcvt (
input logic [`NE-1:0] Xe, // input's exponent
input logic [`NF:0] Xm, // input's fraction
input logic [`XLEN-1:0] Int, // integer input - from IEU
input logic [2:0] FOpCtrl, // choose which opperation (look below for values)
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic XZero, // is the input zero
input logic XDenorm, // is the input denormalized
@ -73,17 +73,17 @@ module fcvt (
// seperate OpCtrl for code readability
assign Signed = FOpCtrl[0];
assign Int64 = FOpCtrl[1];
assign IntToFp = FOpCtrl[2];
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT);
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0];
assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
@ -31,20 +31,20 @@
`include "wally-config.vh"
module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected
output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value
always_comb begin
// set defaults
FForwardXE = 2'b00; // choose FRD1E
FForwardYE = 2'b00; // choose FRD2E
FForwardZE = 2'b00; // choose FRD3E
ForwardXE = 2'b00; // choose FRD1E
ForwardYE = 2'b00; // choose FRD2E
ForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
@ -52,28 +52,28 @@ module fhazard(
// if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
// if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
@ -34,7 +34,7 @@ module fma(
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad
output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic ZmSticky, // sticky bit that is calculated during alignment
@ -46,7 +46,7 @@ module fma(
output logic Ps, // the product's sign
output logic Ss, // the sum's sign
output logic [`NE+1:0] Se,
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count
output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
@ -72,7 +72,7 @@ module fma(
// Alignment shifter
// calculate the signs and take the opperation into account
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
@ -85,7 +85,7 @@ module fma(
add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt);
@ -120,7 +120,7 @@ endmodule
module sign(
input logic [2:0] FOpCtrl, // opperation contol
input logic [2:0] OpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic As // aligned addend sign used in fma - takes opperation into account
@ -130,9 +130,9 @@ module sign(
// Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation
assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
// flip if subtraction
assign As = Zs^FOpCtrl[0];
assign As = Zs^OpCtrl[0];
@ -275,7 +275,7 @@ endmodule
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+3:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result
output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result
logic [3*`NF+6:0] T;
@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt));
@ -32,7 +32,7 @@ module fmashiftcalc(
input logic [3*`NF+5:0] FmaSm, // the positive sum
input logic [`NE-1:0] Ze, // exponent of Z
input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic FmaKillProd, // is the product set to zero
input logic [`NE+1:0] FmaSe,
@ -52,7 +52,7 @@ module fmashiftcalc(
// Determine if the sum is zero
assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
@ -152,7 +152,7 @@ module fmashiftcalc(
// - shift once if killing a product and the result is denormalized
assign FmaShiftIn = {3'b0, FmaSm};
if (`FPSIZES == 1)
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1;
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1;
assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1;
@ -30,28 +30,28 @@
`include "wally-config.vh"
module fpu (
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU
input logic [`FLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled?
output logic FRegWriteM, // FP register write enable
output logic FpLoadStoreM, // Fp load instruction?
output logic FStore2,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
output logic [1:0] FResSelW,
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
input logic [31:0] InstrD, // instruction (from IFU)
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU)
input logic StallE, StallM, StallW, // stall signals (from HZU)
input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
output logic FRegWriteM, // FP register write enable (to privileged unit)
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
output logic FStore2, // store two words into memory (to LSU)
output logic FStallD, // Stall the decode stage (To HZU)
output logic FWriteIntE, // integer register write enable (to IEU)
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic [1:0] FResSelW, // final result selection (to IEU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
@ -62,99 +62,88 @@ module fpu (
// - sets the underflow after rounding
// control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic FWriteIntM; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
logic FRegWriteW; // FP register write enable
logic [2:0] FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic DivStartE; // Start division or squareroot
logic FWriteIntM; // Write to integer register
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic FmtQ;
logic FOpCtrlQ;
logic XsE, YsE, ZsE; // input's sign - execute stage
logic XsM, YsM; // input's sign - memory stage
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [`NE-1:0] ZeM; // input's exponent - memory stage
logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage
logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
// Fma Signals
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic [`NE+1:0] SeE,SeM;
logic KillProdE, KillProdM;
logic InvAE, InvAM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic SsE, SsM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
logic [3*`NF+5:0] SmE, SmM;
logic [`NE+1:0] PeE, PeM;
logic ZmStickyE, ZmStickyM;
logic [`NE+1:0] SeE,SeM;
logic KillProdE, KillProdM;
logic InvAE, InvAM;
logic NegSumE, NegSumM;
logic AsE, AsM;
logic PsE, PsM;
logic SsE, SsM;
logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM;
// Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic [`NE:0] CeE, CeM; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign
logic CsE, CsM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals
logic [`QLEN-1-(`RADIX/4):0] QuotM;
logic [`NE+1:0] DivCalcExpE, DivCalcExpM;
logic DivStickyE, DivStickyM;
logic DivDoneM;
logic [`DURLEN-1:0] EarlyTermShiftM;
logic [`QLEN-1-(`RADIX/4):0] QmM;
logic [`NE+1:0] QeE, QeM;
logic DivSE, DivSM;
logic DivDoneM;
logic [`DURLEN-1:0] EarlyTermShiftM;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
logic [`XLEN-1:0] ClassResE; // classify result
logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] ClassResE; // classify result
logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] FCvtIntResM;
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic load_preload; // enable for FF on fpdivsqrt
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
@ -171,9 +160,11 @@ module fpu (
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
.FmtD, .FrmD, .FWriteIntD);
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
.DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -185,12 +176,6 @@ module fpu (
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
@ -207,12 +192,12 @@ module fpu (
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
.FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE);
@ -227,7 +212,7 @@ module fpu (
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
@ -241,55 +226,76 @@ module fpu (
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE);
// unpack unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZInf(ZInfE), .XExpMax(XExpMaxE));
// fma - does multiply, add, and multiply-add instructions
fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE),
.Xe(XExpE), .Ye(YExpE), .Ze(ZExpE),
.Xm(XManE), .Ym(YManE), .Zm(ZManE),
// fused multiply add
// - fadd/fsub
// - fmul
// - fmadd/fnmadd/fmsub/fnmsub
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE),
.Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
.FOpCtrl(FOpCtrlE), .Fmt(FmtE),
.As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE),
.Sm(SumE), .Pe(ProdExpE),
.NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE),
.ZmSticky(AddendStickyE), .KillProd(KillProdE));
.OpCtrl(OpCtrlE), .Fmt(FmtE),
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
.Sm(SmE), .Pe(PeE),
.NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE),
.ZmSticky(ZmStickyE), .KillProd(KillProdE));
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
.StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
.EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
// divide and squareroot
// - fdiv
// - fsqrt
// *** add other opperations
divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE),
.StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
.EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
// compare
// - fmin/fmax
// - flt/fle/feq
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// sign injection
// - fsgnj/fsgnjx/fsgnjn
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE),
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE),
// classify
// - fclass
fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// convert
// - fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE),
.ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE),
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
if (`LLEN==`XLEN) begin
assign FWriteDataE = FSrcYE[`XLEN-1:0];
assign FWriteDataE = YE[`XLEN-1:0];
end else begin
logic [`FLEN-1:0] FWriteDataE;
if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
else assign FStore2 = FmtM;
if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
if (`FPSIZES==1) assign FWriteDataE = YE;
else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
@ -306,14 +312,14 @@ module fpu (
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
// select a result that may be written to the FP register
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
if (`FLEN>`XLEN)
assign IntSrcXE = FSrcXE[`XLEN-1:0];
assign IntSrcXE = XE[`XLEN-1:0];
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
@ -321,27 +327,24 @@ module fpu (
// E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM});
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);
flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE},
{AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM});
{ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE},
{ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
{CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
{CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
@ -357,11 +360,11 @@ module fpu (
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
.FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM),
.CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM),
.FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM),
.FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged
@ -371,9 +374,6 @@ module fpu (
// M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM, FmtM},
{FRegWriteW, FResSelW, FmtW});
@ -26,60 +26,59 @@
`include "wally-config.vh"
module fsgninj (
input logic XSgnE, YSgnE, // X and Y sign bits
input logic [`FLEN-1:0] FSrcXE, // X
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic [1:0] SgnOpCodeE, // operation control
output logic [`FLEN-1:0] SgnResE // result
input logic Xs, Ys, // X and Y sign bits
input logic [`FLEN-1:0] X, // X
input logic [`FMTBITS-1:0] Fmt, // format
input logic [1:0] OpCtrl, // operation control
output logic [`FLEN-1:0] SgnRes // result
logic ResSgn;
//op code designation:
//00 - fsgnj - directly copy over sign value of FSrcYE
//01 - fsgnjn - negate sign value of FSrcYE
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
// OpCtrl:
// 00 - fsgnj - directly copy over sign value of Y
// 01 - fsgnjn - negate sign value of Y
// 10 - fsgnjx - XOR sign values of X and Y
// calculate the result's sign
assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
// format final result based on precision
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
if (`FPSIZES == 1)
assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
else if (`FPSIZES == 2)
assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]};
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
else if (`FPSIZES == 3) begin
logic [2:0] SgnBits;
case (FmtE)
`FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]};
case (Fmt)
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
`FMT2: SgnBits = {2'b11, ResSgn};
default: SgnBits = {3{1'bx}};
assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]};
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
end else if (`FPSIZES == 4) begin
logic [3:0] SgnBits;
case (FmtE)
`Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]};
case (Fmt)
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
`H_FMT: SgnBits = {3'b111, ResSgn};
assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]};
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
@ -107,6 +107,6 @@ module otfc4 (
QMNext = {QMR, 2'b11};
// Final Quoteint is in the range [.5, 2)
// Final Qmeint is in the range [.5, 2)
@ -36,7 +36,7 @@ module postprocess (
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] FOpCtrl, // choose which opperation (look below for values)
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, ZZero, // inputs are zero
input logic XInf, YInf, ZInf, // inputs are infinity
input logic XNaN, YNaN, ZNaN, // inputs are NaN
@ -54,7 +54,7 @@ module postprocess (
input logic FmaNegSum, // was the sum negitive
input logic FmaInvA, // do you invert Z
input logic FmaSs,
input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count
input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic [`DURLEN-1:0] DivEarlyTermShift,
input logic DivS,
@ -125,14 +125,14 @@ module postprocess (
logic Sqrt;
// signals to help readability
assign Signed = FOpCtrl[0];
assign Int64 = FOpCtrl[1];
assign IntToFp = FOpCtrl[2];
assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0];
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
assign CvtOp = (PostProcSel == 2'b00);
assign FmaOp = (PostProcSel == 2'b10);
assign DivOp = (PostProcSel == 2'b01)&DivDone;
assign Sqrt = FOpCtrl[0];
assign Sqrt = OpCtrl[0];
// is there an input of infinity or NaN being used
assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
@ -142,9 +142,9 @@ module postprocess (
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT);
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0];
assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
// Normalization
@ -152,7 +152,7 @@ module postprocess (
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20
// Qmient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
@ -43,7 +43,7 @@ module shiftcorrection(
output logic [`NE+1:0] FmaMe // exponent of the normalized sum
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
@ -53,11 +53,11 @@ module shiftcorrection(
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
// if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
// if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted;
else if (DivOp&~DivResDenorm) Mf = CorrQmShifted;
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
@ -37,15 +37,15 @@ module srt(
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] Xe, Ye,
input logic XZeroE, YZeroE,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
input logic NegSticky,
output logic [`QLEN-1-(`RADIX/4):0] Quot,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
input logic NegSticky,
output logic [`QLEN-1-(`RADIX/4):0] Qm,
output logic [`DIVLEN+3:0] NextWSN, NextWCN,
output logic [`DIVLEN+3:0] StickyWSA,
output logic [`DIVLEN+3:0] FirstWS, FirstWC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`NE+1:0] QeM,
output logic [`XLEN-1:0] Rem
@ -62,7 +62,7 @@ module srt(
/* verilator lint_on UNOPTFLAT */
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2;
logic [`NE+1:0] DivCalcExp;
logic [`NE+1:0] Qe;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
logic [`QLEN-1:0] QMMux;
@ -88,7 +88,7 @@ module srt(
mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
// Divisor Selections
@ -123,7 +123,7 @@ module srt(
flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
assign FirstWS = WS[0];
assign FirstWC = WC[0];
@ -132,7 +132,7 @@ module srt(
assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe);
@ -155,7 +155,7 @@ module divinteration (
logic [3:0] q;
logic qp, qz;//, qn;
// Quotient Selection logic
// Qmient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// q encoding:
// 1000 = +2
@ -226,7 +226,7 @@ module expcalc(
input logic [`NE-1:0] Xe, Ye,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
output logic [`NE+1:0] Qe
logic [`NE-2:0] Bias;
@ -255,5 +255,5 @@ module expcalc(
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
@ -43,7 +43,7 @@ module srtfsm(
input logic [`DIVLEN+3:0] StickyWSA,
input logic [`DURLEN-1:0] Dur,
output logic [`DURLEN-1:0] EarlyTermShiftE,
output logic DivStickyE,
output logic DivSE,
output logic DivDone,
output logic NegSticky,
output logic DivBusy
@ -65,9 +65,9 @@ module srtfsm(
// this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
// radix-4 division can't create a QM that continually adds 0's
if (`RADIX == 2)
assign DivStickyE = |W&~(StickyWSA == WS);
assign DivSE = |W&~(StickyWSA == WS);
assign DivStickyE = |W;
assign DivSE = |W;
assign DivDone = (state == DONE);
assign W = WC+WS;
assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
@ -30,35 +30,34 @@
module unpack (
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XDenorm, ZDenorm, // is XYZ denormalized
output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax // does X have the maximum exponent (NaN or Inf)
logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s
logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE),
.NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
.Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm),
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE),
.NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
.Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym),
.NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
.Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE),
.NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
.Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm),
.NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
// is the input denormalized
assign XDenormE = ~XExpNonZero & ~XFracZero;
assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
assign XDenorm = ~XExpNonZero & ~XFracZero;
assign ZDenorm = ~ZExpNonZero & ~ZFracZero;
@ -30,7 +30,7 @@
module unpackinput (
input logic [`FLEN-1:0] In, // inputs from register file
input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Sgn, // sign bits of XYZ
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
@ -74,16 +74,16 @@ module unpackinput (
// quad and half
// double and half
assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
// extract the fraction, add trailing zeroes to the mantissa if nessisary
assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
// is the exponent non-zero
assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
// example double to single conversion:
// 1023 = 0011 1111 1111
@ -95,10 +95,10 @@ module unpackinput (
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a denormal number convert the exponent value 1
assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
// is the exponent all 1's
assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
end else if (`FPSIZES == 3) begin // three floating point precsions supported
@ -122,7 +122,7 @@ module unpackinput (
// Check NaN boxing
case (FmtE)
case (Fmt)
`FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
@ -131,7 +131,7 @@ module unpackinput (
// extract the sign bit
case (FmtE)
case (Fmt)
`FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1];
@ -140,7 +140,7 @@ module unpackinput (
// extract the fraction
case (FmtE)
case (Fmt)
`FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
@ -149,7 +149,7 @@ module unpackinput (
// is the exponent non-zero
case (FmtE)
case (Fmt)
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
@ -166,7 +166,7 @@ module unpackinput (
// convert the larger precision's exponent to use the largest precision's bias
case (FmtE)
case (Fmt)
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
@ -175,7 +175,7 @@ module unpackinput (
// is the exponent all 1's
case (FmtE)
case (Fmt)
`FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
@ -194,7 +194,7 @@ module unpackinput (
// Check NaN boxing
case (FmtE)
case (Fmt)
2'b11: BadNaNBox = 0;
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
@ -203,7 +203,7 @@ module unpackinput (
// extract sign bit
case (FmtE)
case (Fmt)
2'b11: Sgn = In[`Q_LEN-1];
2'b01: Sgn = In[`D_LEN-1];
2'b00: Sgn = In[`S_LEN-1];
@ -213,7 +213,7 @@ module unpackinput (
// extract the fraction
case (FmtE)
case (Fmt)
2'b11: Frac = In[`Q_NF-1:0];
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@ -222,7 +222,7 @@ module unpackinput (
// is the exponent non-zero
case (FmtE)
case (Fmt)
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
@ -240,7 +240,7 @@ module unpackinput (
// convert the double precsion exponent into quad precsion
case (FmtE)
case (Fmt)
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
@ -250,7 +250,7 @@ module unpackinput (
// is the exponent all 1's
case (FmtE)
case (Fmt)
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
Reference in New Issue
Block a user