slight mod to fpdiv - still bug in batch vs. non-batch

This commit is contained in:
James E. Stine 2021-07-20 01:47:46 -04:00
parent 365485bd8b
commit 12e09a7ace
2 changed files with 316 additions and 314 deletions

View File

@ -25,23 +25,23 @@
`include "wally-config.vh"
module fpu (
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [4:0] RdE, RdM, RdW,
output logic FRegWriteM,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [4:0] RdE, RdM, RdW,
output logic FRegWriteM,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM,
output logic FDivBusyE, // Is the divison/sqrt unit busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FDivBusyE, // Is the divison/sqrt unit busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM); // FPU result
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
// *** folder at same level of src for tests fpu tests
@ -50,254 +50,256 @@ module fpu (
generate
if (`F_SUPPORTED | `D_SUPPORTED) begin
// control logic signal instantiation
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E;
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E;
// regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`XLEN-1:0] FSrcXMAligned;
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding)
// regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`XLEN-1:0] FSrcXMAligned;
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE;
logic [10:0] XExpE, YExpE, ZExpE;
logic [51:0] XFracE, YFracE, ZFracE;
logic XAssumed1E, YAssumed1E, ZAssumed1E;
logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic [10:0] BiasE;
logic XInfE, YInfE, ZInfE;
logic XExpMaxE;
logic XNormE;
// unpacking signals
logic XSgnE, YSgnE, ZSgnE;
logic [10:0] XExpE, YExpE, ZExpE;
logic [51:0] XFracE, YFracE, ZFracE;
logic XAssumed1E, YAssumed1E, ZAssumed1E;
logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic [10:0] BiasE;
logic XInfE, YInfE, ZInfE;
logic XExpMaxE;
logic XNormE;
logic XSgnM, YSgnM, ZSgnM;
logic [10:0] XExpM, YExpM, ZExpM;
logic [51:0] XFracM, YFracM, ZFracM;
logic XNaNM, YNaNM, ZNaNM;
logic XSNaNM, YSNaNM, ZSNaNM;
logic XZeroM, YZeroM, ZZeroM;
logic XInfM, YInfM, ZInfM;
logic XSgnM, YSgnM, ZSgnM;
logic [10:0] XExpM, YExpM, ZExpM;
logic [51:0] XFracM, YFracM, ZFracM;
logic XNaNM, YNaNM, ZNaNM;
logic XSNaNM, YSNaNM, ZSNaNM;
logic XZeroM, YZeroM, ZZeroM;
logic XInfM, YInfM, ZInfM;
// div/sqrt signals
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
logic FDivSqrtDoneE;
logic [63:0] DivInput1E, DivInput2E;
logic HoldInputs; // keep forwarded inputs arround durring division
// div/sqrt signals
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
logic FDivSqrtDoneE;
logic [63:0] DivInput1E, DivInput2E;
logic HoldInputs; // keep forwarded inputs arround durring division
//fpu signals
logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW;
//fpu signals
logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW;
logic [63:0] ReadResW;
logic [63:0] ReadResW;
// add/cvt signals
logic [63:0] FAddResM, FAddResW;
logic [4:0] FAddFlgM, FAddFlgW;
logic [63:0] CvtResE, CvtResM;
logic [4:0] CvtFlgE, CvtFlgM;
// add/cvt signals
logic [63:0] FAddResM, FAddResW;
logic [4:0] FAddFlgM, FAddFlgW;
logic [63:0] CvtResE, CvtResM;
logic [4:0] CvtFlgE, CvtFlgM;
// cmp signals
logic CmpNVE, CmpNVM, CmpNVW;
logic [63:0] CmpResE, CmpResM, CmpResW;
// cmp signals
logic CmpNVE, CmpNVM, CmpNVW;
logic [63:0] CmpResE, CmpResM, CmpResW;
// fsgn signals
logic [63:0] SgnResE, SgnResM;
logic SgnNVE, SgnNVM, SgnNVW;
logic [63:0] FResM, FResW;
logic [4:0] FFlgM, FFlgW;
// fsgn signals
logic [63:0] SgnResE, SgnResM;
logic SgnNVE, SgnNVM, SgnNVW;
logic [63:0] FResM, FResW;
logic [4:0] FFlgM, FFlgW;
// instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM;
// instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM;
// classify signals
logic [63:0] ClassResE, ClassResM;
// classify signals
logic [63:0] ClassResE, ClassResM;
// 64-bit FPU result
logic [63:0] FPUResultW;
logic [4:0] FPUFlagsW;
// 64-bit FPU result
logic [63:0] FPUResultW;
logic [4:0] FPUFlagsW;
//DECODE STAGE
// top-level controller for FPU
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
//DECODE STAGE
// regfile instantiation
fregfile fregfile (clk, reset, FRegWriteW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResultW,
FRD1D, FRD2D, FRD3D);
// top-level controller for FPU
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
//*****************
// D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
// regfile instantiation
fregfile fregfile (clk, reset, FRegWriteW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResultW,
FRD1D, FRD2D, FRD3D);
//EXECUTION STAGE
//*****************
// D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
//EXECUTION STAGE
// Hazard unit for FPU
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
// Hazard unit for FPU
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
.FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE);
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE),
.FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE,
.ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE,
.XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE,
.XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// first of two-stage instance of floating-point fused multiply-add unit
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .
ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE,
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM,
.YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
// first and only instance of floating-point divider
logic fpdivClk;
// first and only instance of floating-point divider
logic fpdivClk;
clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0),
.CLK(clk),
.ECLK(fpdivClk));
clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0),
.CLK(clk),
.ECLK(fpdivClk));
// capture the inputs for div/sqrt
flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs));
flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs));
//*** add round to nearest ties to max magnitude
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.P(~FmtE), .FDivBusyE, .HoldInputs,
.OvEn(1'b1), .UnEn(1'b1),
.start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM));
// capture the inputs for div/sqrt
flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs));
flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs));
//*** add round to nearest ties to max magnitude
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), .P(~FmtE), .FDivBusyE, .HoldInputs,
.OvEn(1'b1), .UnEn(1'b1), .start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM));
// .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
// .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
// .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
// assign FDivBusyE = 0;
// first of two-stage instance of floating-point add/cvt unit
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
// assign FDivBusyE = 0;
// first and only instance of floating-point comparator
fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
// first of two-stage instance of floating-point add/cvt unit
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
// first and only instance of floating-point sign converter
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
// first and only instance of floating-point comparator
fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE,
.FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE,
.Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
// first and only instance of floating-point classify unit
fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
// first and only instance of floating-point sign converter
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
// first and only instance of floating-point classify unit
fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
// output for store instructions
assign FWriteDataE = FSrcYE[`XLEN-1:0];
//*****************
// E/M pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM});
flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM});
flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM});
flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
//BEGIN MEMORY STAGE
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
// mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
//*****************
// M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
//#########################################
// BEGIN WRITEBACK STAGE
//#########################################
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
// output for store instructions
assign FWriteDataE = FSrcYE[`XLEN-1:0];
//*****************
// E/M pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM});
flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM});
flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM});
flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
//BEGIN MEMORY STAGE
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
// mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
//*****************
// M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
//#########################################
// BEGIN WRITEBACK STAGE
//#########################################
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
end else begin // no F_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;
assign FWriteIntM = 0;
assign FWriteIntW = 0;
assign FWriteDataE = 0;
assign FIntResM = 0;
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
end
end else begin // no F_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;
assign FWriteIntM = 0;
assign FWriteIntW = 0;
assign FWriteDataE = 0;
assign FIntResM = 0;
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
end
endgenerate
endmodule // fpu

View File

@ -6,7 +6,7 @@ module fsm (done, load_rega, load_regb, load_regc,
input clk;
input reset;
input start;
// input error;
// input error;
input op_type;
//***can use divbusy insted of holdinputs
output done;
@ -113,8 +113,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S1:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -129,8 +129,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S2: // iteration 1
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -145,8 +145,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S3:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -161,8 +161,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S4: // iteration 2
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -177,8 +177,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S5:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -193,8 +193,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S6: // iteration 3
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -209,8 +209,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S7:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -225,8 +225,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S8: // q,qm,qp
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -241,8 +241,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S9: // rem
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -257,8 +257,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S10: // done
begin
done = 1'b1;
divBusy = 1'b0;
holdInputs = 1'b0;
divBusy = 1'b0;
holdInputs = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -273,8 +273,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S13: // start of sqrt path
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -289,8 +289,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S14:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -305,8 +305,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S15: // iteration 1
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -321,8 +321,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S16:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -337,8 +337,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S17:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -353,8 +353,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S18: // iteration 2
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -369,8 +369,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S19:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -385,8 +385,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S20:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -401,8 +401,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S21: // iteration 3
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
@ -417,8 +417,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S22:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -433,8 +433,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S23:
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
@ -449,8 +449,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S24: // q,qm,qp
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -465,8 +465,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S25: // rem
begin
done = 1'b0;
divBusy = 1'b1;
holdInputs = 1'b1;
divBusy = 1'b1;
holdInputs = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -476,13 +476,13 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011;
sel_muxb = 3'b110;
sel_muxr = 1'b1;
NEXT_STATE = S26;
NEXT_STATE = S27;
end
S26: // done
begin
done = 1'b1;
divBusy = 1'b0;
holdInputs = 1'b0;
divBusy = 1'b0;
holdInputs = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
@ -497,8 +497,8 @@ module fsm (done, load_rega, load_regb, load_regc,
default:
begin
done = 1'b0;
divBusy = 1'b0;
holdInputs = 1'b0;
divBusy = 1'b0;
holdInputs = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;