mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Update top level parameterized. Simulation slowed down to 4.5 minutes.
This commit is contained in:
parent
8cf38b28aa
commit
b517a96261
@ -25,9 +25,8 @@
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fctrl (
|
||||
module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
// input control signals
|
||||
@ -49,7 +48,7 @@ module fctrl (
|
||||
// opperation mux selections
|
||||
output logic FCvtIntE, FCvtIntW, // convert to integer opperation
|
||||
output logic [2:0] FrmM, // FP rounding mode
|
||||
output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic [P.FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component
|
||||
output logic FpLoadStoreM, // FP load or store instruction
|
||||
output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit
|
||||
@ -74,7 +73,7 @@ module fctrl (
|
||||
logic [1:0] PostProcSelD; // select result in the post processing unit
|
||||
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
|
||||
logic [2:0] FrmD, FrmE; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtD; // FP format
|
||||
logic [P.FMTBITS-1:0] FmtD; // FP format
|
||||
logic [1:0] Fmt, Fmt2; // format - before possible reduction
|
||||
logic SupportedFmt; // is the format supported
|
||||
logic SupportedFmt2; // is the source format supported for fp -> fp
|
||||
@ -84,10 +83,10 @@ module fctrl (
|
||||
assign Fmt = Funct7D[1:0];
|
||||
assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp
|
||||
|
||||
assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) |
|
||||
(Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED));
|
||||
assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) |
|
||||
(Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED));
|
||||
assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & P.D_SUPPORTED) |
|
||||
(Fmt == 2'b10 & P.ZFH_SUPPORTED) | (Fmt == 2'b11 & P.Q_SUPPORTED));
|
||||
assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & P.D_SUPPORTED) |
|
||||
(Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED));
|
||||
|
||||
// decode the instruction
|
||||
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt
|
||||
@ -102,15 +101,15 @@ module fctrl (
|
||||
case(OpD)
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw
|
||||
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
|
||||
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
|
||||
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw
|
||||
3'b011: if (`D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
|
||||
3'b100: if (`Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
|
||||
3'b001: if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub
|
||||
@ -227,14 +226,14 @@ module fctrl (
|
||||
// 10 - half
|
||||
// 11 - quad
|
||||
|
||||
if (`FPSIZES == 1)
|
||||
if (P.FPSIZES == 1)
|
||||
assign FmtD = 0;
|
||||
else if (`FPSIZES == 2)begin
|
||||
else if (P.FPSIZES == 2)begin
|
||||
logic [1:0] FmtTmp;
|
||||
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
|
||||
assign FmtD = (`FMT == FmtTmp);
|
||||
assign FmtD = (P.FMT == FmtTmp);
|
||||
end
|
||||
else if (`FPSIZES == 3|`FPSIZES == 4)
|
||||
else if (P.FPSIZES == 3|P.FPSIZES == 4)
|
||||
assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
|
||||
// Enables indicate that a source register is used and may need stalls. Also indicate special cases for infinity or NaN.
|
||||
@ -313,7 +312,7 @@ module fctrl (
|
||||
assign Adr3D = InstrD[31:27];
|
||||
|
||||
// D/E pipleine register
|
||||
flopenrc #(13+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
flopenrc #(13+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE});
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
|
||||
@ -321,11 +320,11 @@ module fctrl (
|
||||
flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});
|
||||
|
||||
// Integer division on FPU divider
|
||||
if (`M_SUPPORTED & `IDIV_ON_FPU) assign IDivStartE = IntDivE;
|
||||
if (P.M_SUPPORTED & P.IDIV_ON_FPU) assign IDivStartE = IntDivE;
|
||||
else assign IDivStartE = 0;
|
||||
|
||||
// E/M pipleine register
|
||||
flopenrc #(13+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM});
|
||||
|
||||
|
210
src/fpu/fpu.sv
210
src/fpu/fpu.sv
@ -26,9 +26,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpu (
|
||||
module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
// Hazards
|
||||
@ -44,7 +42,7 @@ module fpu (
|
||||
// Execute stage
|
||||
input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations
|
||||
input logic IntDivE, W64E, // Integer division on FPU
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
|
||||
input logic [4:0] RdE, // which FP register to write to (from IEU)
|
||||
output logic FWriteIntE, // integer register write enable (to IEU)
|
||||
output logic FCvtIntE, // Convert to int (to IEU)
|
||||
@ -53,16 +51,16 @@ module fpu (
|
||||
input logic [4:0] RdM, // which FP register to write to (from IEU)
|
||||
output logic FRegWriteM, // FP register write enable (to privileged unit)
|
||||
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
|
||||
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic [P.FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
|
||||
output logic [P.XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU)
|
||||
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
|
||||
// Writeback stage
|
||||
input logic [4:0] RdW, // which FP register to write to (from IEU)
|
||||
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
input logic [P.FLEN-1:0] ReadDataW, // Read data (from LSU)
|
||||
output logic [P.XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
|
||||
output logic FCvtIntW, // select FCvtIntRes (to IEU)
|
||||
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
|
||||
output logic [P.XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
|
||||
);
|
||||
|
||||
// RISC-V FPU specifics:
|
||||
@ -72,7 +70,7 @@ module fpu (
|
||||
// control signals
|
||||
logic FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmM; // FP rounding mode
|
||||
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic [P.FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic FDivStartE, IDivStartE; // Start division or squareroot
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
|
||||
@ -86,20 +84,20 @@ module fpu (
|
||||
logic FRegWriteE; // Write floating-point register
|
||||
|
||||
// regfile signals
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
|
||||
logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [P.FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [P.FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
|
||||
logic [P.XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [P.FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
|
||||
logic [P.FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XsE, YsE, ZsE; // input's sign - execute stage
|
||||
logic XsM, YsM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage
|
||||
logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage
|
||||
logic [P.NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
|
||||
logic [P.NE-1:0] ZeM; // input's exponent - memory stage
|
||||
logic [P.NF:0] XmE, YmE, ZmE; // input's significand - execute stage
|
||||
logic [P.NF:0] XmM, YmM, ZmM; // input's significand - memory stage
|
||||
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
|
||||
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
@ -110,56 +108,56 @@ module fpu (
|
||||
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
|
||||
logic [P.FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
|
||||
|
||||
// Fma Signals
|
||||
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
|
||||
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
|
||||
logic [3*`NF+3:0] SmE, SmM; // Sum significand
|
||||
logic [3*P.NF+3:0] SmE, SmM; // Sum significand
|
||||
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
|
||||
logic [`NE+1:0] SeE,SeM; // Sum exponent
|
||||
logic [P.NE+1:0] SeE,SeM; // Sum exponent
|
||||
logic InvAE, InvAM; // Invert addend
|
||||
logic AsE, AsM; // Addend sign
|
||||
logic PsE, PsM; // Product sign
|
||||
logic SsE, SsM; // Sum sign
|
||||
logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
|
||||
logic [$clog2(3*P.NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
|
||||
|
||||
// Cvt Signals
|
||||
logic [`NE:0] CeE, CeM; // convert intermediate expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic [P.NE:0] CeE, CeM; // convert intermediate expoent
|
||||
logic [P.LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
|
||||
logic CsE, CsM; // convert result sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||
logic [P.CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||
|
||||
// divide signals
|
||||
logic [`DIVb:0] QmM; // fdivsqrt signifcand
|
||||
logic [`NE+1:0] QeM; // fdivsqrt exponent
|
||||
logic [P.DIVb:0] QmM; // fdivsqrt signifcand
|
||||
logic [P.NE+1:0] QeM; // fdivsqrt exponent
|
||||
logic DivStickyM; // fdivsqrt sticky bit
|
||||
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
|
||||
logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||
|
||||
// result and flag signals
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
|
||||
logic [P.XLEN-1:0] ClassResE; // classify result
|
||||
logic [P.FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
|
||||
logic [P.XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
|
||||
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
|
||||
logic [P.FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [P.XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
|
||||
logic [P.FLEN-1:0] PostProcResM; // Postprocessor output
|
||||
logic [4:0] PostProcFlgM; // Postprocessor flags
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
|
||||
logic [P.FLEN-1:0] FpResM, FpResW; // FPU preliminary result
|
||||
logic [P.FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register
|
||||
|
||||
// other signals
|
||||
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
|
||||
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
|
||||
logic [P.FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
|
||||
logic [P.FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
|
||||
logic [P.FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
|
||||
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
|
||||
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
|
||||
logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
|
||||
logic mvsgn; // sign bit for extending move
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -167,7 +165,7 @@ module fpu (
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate FP control signals
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
|
||||
fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
|
||||
.Funct3E, .IntDivE, .InstrD,
|
||||
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
|
||||
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
|
||||
@ -183,9 +181,9 @@ module fpu (
|
||||
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
|
||||
|
||||
// D/E pipeline registers
|
||||
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(P.FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(P.FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(P.FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Execute Stage: hazards, forwarding, unpacking, execution units
|
||||
@ -197,34 +195,34 @@ module fpu (
|
||||
.XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
|
||||
mux3 #(P.FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
|
||||
mux3 #(P.FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
|
||||
mux3 #(P.FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
|
||||
|
||||
// Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
|
||||
if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)},
|
||||
{{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)},
|
||||
{{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)},
|
||||
{2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
if(P.FPSIZES == 1) assign BoxedOneE = {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)};
|
||||
else if(P.FPSIZES == 2)
|
||||
mux2 #(P.FLEN) fonemux ({{P.FLEN-P.LEN1{1'b1}}, 2'b0, {P.NE1-1{1'b1}}, (P.NF1)'(0)}, {2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
mux4 #(P.FLEN) fonemux ({{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)},
|
||||
{{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)},
|
||||
{{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)},
|
||||
{2'b0, {P.NE-1{1'b1}}, (P.NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
|
||||
assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
|
||||
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
|
||||
mux2 #(P.FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
|
||||
|
||||
// Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
|
||||
// For add and subtract, Z comes from second source operand
|
||||
if(`FPSIZES == 1) assign BoxedZeroE = 0;
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}},
|
||||
{{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}},
|
||||
{{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
|
||||
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
if(P.FPSIZES == 1) assign BoxedZeroE = 0;
|
||||
else if(P.FPSIZES == 2)
|
||||
mux2 #(P.FLEN) fmulzeromux ({{P.FLEN-P.LEN1{1'b1}}, {P.LEN1{1'b0}}}, (P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}},
|
||||
{{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}},
|
||||
{{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}},
|
||||
(P.FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
|
||||
assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
|
||||
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
|
||||
mux3 #(P.FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
|
||||
|
||||
// unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
|
||||
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
|
||||
@ -266,62 +264,62 @@ module fpu (
|
||||
|
||||
|
||||
// NaN Box SrcA to convert integer to requested FP size
|
||||
if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
|
||||
else if(`FPSIZES == 2)
|
||||
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
|
||||
else if(`FPSIZES == 3 | `FPSIZES == 4)
|
||||
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]},
|
||||
{{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]},
|
||||
{{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]},
|
||||
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE};
|
||||
else if(P.FPSIZES == 2)
|
||||
mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
|
||||
else if(P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
mux4 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]},
|
||||
{{P.FLEN-P.D_LEN{1'b1}}, ForwardedSrcAE[P.D_LEN-1:0]},
|
||||
{{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]},
|
||||
{{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
|
||||
// select a result that may be written to the FP register
|
||||
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
|
||||
mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
|
||||
|
||||
// select the result that may be written to the integer register with fmv - to IEU
|
||||
if(`FPSIZES == 1) begin
|
||||
assign mvsgn = XE[`FLEN-1];
|
||||
if(P.FPSIZES == 1) begin
|
||||
assign mvsgn = XE[P.FLEN-1];
|
||||
assign SgnExtXE = XE;
|
||||
end else if(`FPSIZES == 2) begin
|
||||
mux2 #(1) sgnmux (XE[`LEN1-1], XE[`FLEN-1],FmtE, mvsgn);
|
||||
mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{mvsgn}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
|
||||
end else if(`FPSIZES == 3 | `FPSIZES == 4) begin
|
||||
mux4 #(1) sgnmux (XE[`H_LEN-1], XE[`S_LEN-1], XE[`D_LEN-1], XE[`LLEN-1], FmtE, mvsgn);
|
||||
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{mvsgn}}, XE[`H_LEN-1:0]},
|
||||
{{`FLEN-`S_LEN{mvsgn}}, XE[`S_LEN-1:0]},
|
||||
{{`FLEN-`D_LEN{mvsgn}}, XE[`D_LEN-1:0]},
|
||||
end else if(P.FPSIZES == 2) begin
|
||||
mux2 #(1) sgnmux (XE[P.LEN1-1], XE[P.FLEN-1],FmtE, mvsgn);
|
||||
mux2 #(P.FLEN) sgnextmux ({{P.FLEN-P.LEN1{mvsgn}}, XE[P.LEN1-1:0]}, XE, FmtE, SgnExtXE);
|
||||
end else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin
|
||||
mux4 #(1) sgnmux (XE[P.H_LEN-1], XE[P.S_LEN-1], XE[P.D_LEN-1], XE[P.LLEN-1], FmtE, mvsgn);
|
||||
mux4 #(P.FLEN) fmulzeromux ({{P.FLEN-P.H_LEN{mvsgn}}, XE[P.H_LEN-1:0]},
|
||||
{{P.FLEN-P.S_LEN{mvsgn}}, XE[P.S_LEN-1:0]},
|
||||
{{P.FLEN-P.D_LEN{mvsgn}}, XE[P.D_LEN-1:0]},
|
||||
XE, FmtE, SgnExtXE);
|
||||
end
|
||||
|
||||
if (`FLEN>`XLEN)
|
||||
assign IntSrcXE = SgnExtXE[`XLEN-1:0];
|
||||
if (P.FLEN>P.XLEN)
|
||||
assign IntSrcXE = SgnExtXE[P.XLEN-1:0];
|
||||
else
|
||||
assign IntSrcXE = {{`XLEN-`FLEN{mvsgn}}, SgnExtXE};
|
||||
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
|
||||
assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE};
|
||||
mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
|
||||
|
||||
// E/M pipe registers
|
||||
|
||||
// Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
|
||||
assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE);
|
||||
|
||||
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
|
||||
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
|
||||
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenrc #(P.NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
|
||||
flopenrc #(P.NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
|
||||
flopenrc #(P.FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
|
||||
flopenrc #(P.XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(P.FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM,
|
||||
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
|
||||
flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
|
||||
flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
|
||||
flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
|
||||
{FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
|
||||
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
{CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE},
|
||||
{CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM});
|
||||
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
|
||||
flopenrc #(P.FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Memory Stage: postprocessor and result muxes
|
||||
@ -337,18 +335,18 @@ module fpu (
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
|
||||
mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
|
||||
mux2 #(P.FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
|
||||
|
||||
// M/W pipe registers
|
||||
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
|
||||
flopenrc #(P.FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(P.XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(P.XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Writeback Stage: result mux
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// select the result to be written to the FP register
|
||||
mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
|
||||
mux2 #(P.FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
|
||||
|
||||
endmodule // fpu
|
||||
|
@ -315,7 +315,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
// floating point unit
|
||||
if (P.F_SUPPORTED) begin:fpu
|
||||
fpu fpu(
|
||||
fpu #(P) fpu(
|
||||
.clk, .reset,
|
||||
.FRM_REGW, // Rounding mode from CSR
|
||||
.InstrD, // instruction from IFU
|
||||
|
Loading…
Reference in New Issue
Block a user