fpu cleanup

This commit is contained in:
David Harris 2023-01-11 12:18:06 -08:00
parent 19f0eb2aa1
commit 68347d3558
2 changed files with 90 additions and 143 deletions

View File

@ -50,6 +50,7 @@ module fctrl (
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
output logic FpLoadStoreM, // FP load or store instruction
output logic FCvtIntW, output logic FCvtIntW,
output logic [4:0] Adr1D, Adr2D, Adr3D, // adresses of each input output logic [4:0] Adr1D, Adr2D, Adr3D, // adresses of each input
output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input
@ -275,12 +276,13 @@ module fctrl (
if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE; if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE;
else assign IDivStartE = 0; else assign IDivStartE = 0;
//assign FCvtIntE = (FResSelE == 2'b01);
// E/M pipleine register // E/M pipleine register
flopenrc #(14+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, flopenrc #(14+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE, FCvtIntE}, {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, IllegalFPUInstrE, FCvtIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, IllegalFPUInstrM, FCvtIntM}); {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, IllegalFPUInstrM, FCvtIntM});
assign FpLoadStoreM = FResSelM[1];
// M/W pipleine register // M/W pipleine register
flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM, FCvtIntM}, {FRegWriteM, FResSelM, FCvtIntM},

View File

@ -61,7 +61,7 @@ module fpu (
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic FCvtIntW, // select FCvtIntRes (to IEU) output logic FCvtIntW, // select FCvtIntRes (to IEU)
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU) output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
); );
// RISC-V FPU specifics: // RISC-V FPU specifics:
// - multiprecision support uses NAN-boxing, putting 1's in unused msbs // - multiprecision support uses NAN-boxing, putting 1's in unused msbs
@ -110,6 +110,8 @@ module fpu (
logic XExpMaxE; // is the exponent all ones (max value) logic XExpMaxE; // is the exponent all ones (max value)
// Fma Signals // Fma Signals
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
logic [3*`NF+3:0] SmE, SmM; // Sum significand logic [3*`NF+3:0] SmE, SmM; // Sum significand
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
logic [`NE+1:0] SeE,SeM; // Sum exponent logic [`NE+1:0] SeE,SeM; // Sum exponent
@ -137,33 +139,27 @@ module fpu (
// result and flag signals // result and flag signals
logic [`XLEN-1:0] ClassResE; // classify result logic [`XLEN-1:0] ClassResE; // classify result
logic [`XLEN-1:0] FIntResE; // classify result logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
logic [`FLEN-1:0] FpResM, FpResW; // classify result logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/gt/geq)
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid) logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
logic [4:0] PostProcFlgM; // Postprocessor flags
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
// other signals
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM;
// DECODE STAGE // other signals
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// ||||||||||| // Decode Stage: fctrl decoder, read register file
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// |||||||||||
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals // calculate FP control signals
@ -171,8 +167,10 @@ module fpu (
.Funct3E, .IntDivE, .InstrD, .Funct3E, .IntDivE, .InstrD,
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
.FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, .Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E); .IllegalFPUInstrM, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW,
.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E);
// FP register file // FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW), fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -185,21 +183,13 @@ module fpu (
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
// EXECUTION STAGE
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// |||||||||||| // Execute Stage: hazards, forwarding, unpacking, execution units
// |||
// |||
// |||||||||
// |||
// |||
// ||||||||||||
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU // Hazard unit for FPU: determines if any forwarding or stalls are needed
// - determines if any forwarding or stalls are needed fhazard fhazard(.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E,
fhazard fhazard(.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E, .FRegWriteE, .FRegWriteM, .FRegWriteW, .RdE, .RdM, .RdW, .FResSelM, .FRegWriteE, .FRegWriteM, .FRegWriteW, .RdE, .RdM, .RdW, .FResSelM,
.XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE); .XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs // forwarding muxs
@ -207,7 +197,7 @@ module fpu (
mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE); mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE); mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
// Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
generate generate
if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}; if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
else if(`FPSIZES == 2) else if(`FPSIZES == 2)
@ -218,11 +208,11 @@ module fpu (
{{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)},
{2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
endgenerate endgenerate
assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
// Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions // For add and subtract, Z comes from second source operand
// Force Z to be 0 for multiply instructions
generate generate
if(`FPSIZES == 1) assign BoxedZeroE = 0; if(`FPSIZES == 1) assign BoxedZeroE = 0;
else if(`FPSIZES == 2) else if(`FPSIZES == 2)
@ -233,12 +223,10 @@ module fpu (
{{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}, {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
endgenerate endgenerate
assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE); // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
// unpack unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Subnorm, Norm, Zero, Infifnity)
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
@ -246,61 +234,38 @@ module fpu (
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE)); .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE));
// fused multiply add // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
// - fadd/fsub fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
// - fmul .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE),
// - fmadd/fnmadd/fmsub/fnmsub .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE),
.Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
.OpCtrl(OpCtrlE),
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
.Sm(SmE),
.InvA(InvAE), .SCnt(SCntE),
.ASticky(FmaAStickyE));
// divide and squareroot // divide and square root: fdiv, fsqrt, optionally integer division
// - fdiv
// - fsqrt
// *** add other opperations
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM, .FIntDivResultM /*, .DivDone(DivDoneM) */); .QmM, .FIntDivResultM);
// // compare: fmin/fmax, flt/fle/feq
// compare
// - fmin/fmax
// - flt/fle/feq
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// sign injection
// - fsgnj/fsgnjx/fsgnjn // sign injection: fsgnj/fsgnjx/fsgnjn
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE)); fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
// classify // classify: fclass
// - fclass
fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// convert // convert: fcvt.*.*
// - fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE),
.ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
.LzcIn(CvtLzcInE));
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM); // NaN Box SrcA to convert integer to requested FP size
// NaN Block SrcA
generate generate
if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}; if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
else if(`FPSIZES == 2) else if(`FPSIZES == 2)
@ -317,8 +282,6 @@ module fpu (
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU // select the result that may be written to the integer register - to IEU
logic [`FLEN-1:0] SgnExtXE;
generate generate
if(`FPSIZES == 1) if(`FPSIZES == 1)
assign SgnExtXE = XE; assign SgnExtXE = XE;
@ -328,20 +291,18 @@ module fpu (
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]}, mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]},
{{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]}, {{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]}, {{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]},
XE, FmtE, SgnExtXE); // NaN boxing zeroes XE, FmtE, SgnExtXE);
endgenerate endgenerate
if (`FLEN>`XLEN) if (`FLEN>`XLEN)
assign IntSrcXE = SgnExtXE[`XLEN-1:0]; assign IntSrcXE = SgnExtXE[`XLEN-1:0];
else else
assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE}; assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
// *** make sure the fpu matches the chapter diagram
// E/M pipe registers // E/M pipe registers
assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE); // Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources // Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE);
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM); flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM); flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
@ -359,47 +320,31 @@ module fpu (
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE}, {CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE},
{CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM}); {CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM});
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
// BEGIN MEMORY STAGE
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// ||| ||| // Memory Stage: postprocessor and result muxes
// |||||| ||||||
// ||| ||| ||| |||
// ||| ||||| |||
// ||| ||| |||
// ||| |||
// ||| |||
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */ .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
.FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
.CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged // FPU flag selection - to privileged
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(`FLEN) FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM); mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
// M/W pipe registers // M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW); flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
// BEGIN WRITEBACK STAGE
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// ||| ||| // Writeback Stage: result mux
// ||| |||
// ||| ||| |||
// ||| ||||| |||
// ||| ||| ||| |||
// |||||| ||||||
// ||| |||
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// select the result to be written to the FP register // select the result to be written to the FP register