slight mod to fpdiv - still bug in batch vs. non-batch

This commit is contained in:
James E. Stine 2021-07-20 01:47:46 -04:00
parent 365485bd8b
commit 12e09a7ace
2 changed files with 316 additions and 314 deletions

View File

@ -25,23 +25,23 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpu ( module fpu (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, input logic [31:0] InstrD,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW, input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW, input logic FlushE, FlushM, FlushW,
input logic [4:0] RdE, RdM, RdW, input logic [4:0] RdE, RdM, RdW,
output logic FRegWriteM, output logic FRegWriteM,
output logic FStallD, // Stall the decode stage output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, output logic [`XLEN-1:0] FIntResM,
output logic FDivBusyE, // Is the divison/sqrt unit busy output logic FDivBusyE, // Is the divison/sqrt unit busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM); // FPU result output logic [4:0] SetFflagsM); // FPU result
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
// *** folder at same level of src for tests fpu tests // *** folder at same level of src for tests fpu tests
@ -50,254 +50,256 @@ module fpu (
generate generate
if (`F_SUPPORTED | `D_SUPPORTED) begin if (`F_SUPPORTED | `D_SUPPORTED) begin
// control logic signal instantiation // control logic signal instantiation
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM; logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E; logic [4:0] Adr1E, Adr2E, Adr3E;
// regfile signals // regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`XLEN-1:0] FSrcXMAligned; logic [`XLEN-1:0] FSrcXMAligned;
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding) logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals // unpacking signals
logic XSgnE, YSgnE, ZSgnE; logic XSgnE, YSgnE, ZSgnE;
logic [10:0] XExpE, YExpE, ZExpE; logic [10:0] XExpE, YExpE, ZExpE;
logic [51:0] XFracE, YFracE, ZFracE; logic [51:0] XFracE, YFracE, ZFracE;
logic XAssumed1E, YAssumed1E, ZAssumed1E; logic XAssumed1E, YAssumed1E, ZAssumed1E;
logic XNaNE, YNaNE, ZNaNE; logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE; logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE; logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE; logic XZeroE, YZeroE, ZZeroE;
logic [10:0] BiasE; logic [10:0] BiasE;
logic XInfE, YInfE, ZInfE; logic XInfE, YInfE, ZInfE;
logic XExpMaxE; logic XExpMaxE;
logic XNormE; logic XNormE;
logic XSgnM, YSgnM, ZSgnM; logic XSgnM, YSgnM, ZSgnM;
logic [10:0] XExpM, YExpM, ZExpM; logic [10:0] XExpM, YExpM, ZExpM;
logic [51:0] XFracM, YFracM, ZFracM; logic [51:0] XFracM, YFracM, ZFracM;
logic XNaNM, YNaNM, ZNaNM; logic XNaNM, YNaNM, ZNaNM;
logic XSNaNM, YSNaNM, ZSNaNM; logic XSNaNM, YSNaNM, ZSNaNM;
logic XZeroM, YZeroM, ZZeroM; logic XZeroM, YZeroM, ZZeroM;
logic XInfM, YInfM, ZInfM; logic XInfM, YInfM, ZInfM;
// div/sqrt signals // div/sqrt signals
logic [63:0] FDivResultM, FDivResultW; logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
logic FDivSqrtDoneE; logic FDivSqrtDoneE;
logic [63:0] DivInput1E, DivInput2E; logic [63:0] DivInput1E, DivInput2E;
logic HoldInputs; // keep forwarded inputs arround durring division logic HoldInputs; // keep forwarded inputs arround durring division
//fpu signals //fpu signals
logic [63:0] FMAResM, FMAResW; logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW; logic [4:0] FMAFlgM, FMAFlgW;
logic [63:0] ReadResW;
logic [63:0] ReadResW;
// add/cvt signals
// add/cvt signals logic [63:0] FAddResM, FAddResW;
logic [63:0] FAddResM, FAddResW; logic [4:0] FAddFlgM, FAddFlgW;
logic [4:0] FAddFlgM, FAddFlgW; logic [63:0] CvtResE, CvtResM;
logic [63:0] CvtResE, CvtResM; logic [4:0] CvtFlgE, CvtFlgM;
logic [4:0] CvtFlgE, CvtFlgM;
// cmp signals
// cmp signals logic CmpNVE, CmpNVM, CmpNVW;
logic CmpNVE, CmpNVM, CmpNVW; logic [63:0] CmpResE, CmpResM, CmpResW;
logic [63:0] CmpResE, CmpResM, CmpResW;
// fsgn signals
// fsgn signals logic [63:0] SgnResE, SgnResM;
logic [63:0] SgnResE, SgnResM; logic SgnNVE, SgnNVM, SgnNVW;
logic SgnNVE, SgnNVM, SgnNVW; logic [63:0] FResM, FResW;
logic [63:0] FResM, FResW; logic [4:0] FFlgM, FFlgW;
logic [4:0] FFlgM, FFlgW;
// instantiation of W stage regfile signals
// instantiation of W stage regfile signals logic [63:0] AlignedSrcAM;
logic [63:0] AlignedSrcAM;
// classify signals
// classify signals logic [63:0] ClassResE, ClassResM;
logic [63:0] ClassResE, ClassResM;
// 64-bit FPU result
// 64-bit FPU result logic [63:0] FPUResultW;
logic [63:0] FPUResultW; logic [4:0] FPUFlagsW;
logic [4:0] FPUFlagsW;
//DECODE STAGE
//DECODE STAGE // top-level controller for FPU
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
// top-level controller for FPU .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
.FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD); // regfile instantiation
fregfile fregfile (clk, reset, FRegWriteW,
// regfile instantiation InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
fregfile fregfile (clk, reset, FRegWriteW, FPUResultW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, FRD1D, FRD2D, FRD3D);
FPUResultW,
FRD1D, FRD2D, FRD3D); //*****************
// D/E pipe registers
//***************** //*****************
// D/E pipe registers flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
//***************** flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); {Adr1E, Adr2E, Adr3E});
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{Adr1E, Adr2E, Adr3E}); {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); //EXECUTION STAGE
// Hazard unit for FPU
//EXECUTION STAGE fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
// Hazard unit for FPU
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
.FForwardXE, .FForwardYE, .FForwardZE); .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs // forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE); mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE);
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE),
.FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE,
.ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE,
.XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE,
.XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// first of two-stage instance of floating-point fused multiply-add unit // first of two-stage instance of floating-point fused multiply-add unit
fma fma (.clk, .reset, .FlushM, .StallM, fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE,
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM,
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
// first and only instance of floating-point divider .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
logic fpdivClk; .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
clockgater fpdivclkg(.E(FDivStartE), // first and only instance of floating-point divider
.SE(1'b0), logic fpdivClk;
.CLK(clk),
.ECLK(fpdivClk)); clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0),
// capture the inputs for div/sqrt .CLK(clk),
flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), .ECLK(fpdivClk));
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs)); // capture the inputs for div/sqrt
flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
.en(1'b1), .clear(FDivSqrtDoneE), .en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(HoldInputs)); .reset(reset), .clk(HoldInputs));
//*** add round to nearest ties to max magnitude flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), .P(~FmtE), .FDivBusyE, .HoldInputs, .en(1'b1), .clear(FDivSqrtDoneE),
.OvEn(1'b1), .UnEn(1'b1), .start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); .reset(reset), .clk(HoldInputs));
//*** add round to nearest ties to max magnitude
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.P(~FmtE), .FDivBusyE, .HoldInputs,
.OvEn(1'b1), .UnEn(1'b1),
.start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM));
// .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, // .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
// .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, // .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
// .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); // .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
// assign FDivBusyE = 0; // assign FDivBusyE = 0;
// first of two-stage instance of floating-point add/cvt unit
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, // first of two-stage instance of floating-point add/cvt unit
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
// first and only instance of floating-point comparator
fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); // first and only instance of floating-point comparator
fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE,
// first and only instance of floating-point sign converter .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE,
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
// first and only instance of floating-point classify unit // first and only instance of floating-point sign converter
fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
// first and only instance of floating-point classify unit
fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
// output for store instructions fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
assign FWriteDataE = FSrcYE[`XLEN-1:0];
// output for store instructions
//***************** assign FWriteDataE = FSrcYE[`XLEN-1:0];
// E/M pipe registers
//***************** //*****************
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); // E/M pipe registers
// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); //*****************
// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM}); // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM}); // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM}); flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM});
flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM});
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM});
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM, flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
//BEGIN MEMORY STAGE //BEGIN MEMORY STAGE
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
// mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned); // mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// Align SrcA to MSB when single precicion // Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM); mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM);
//***************** //*****************
// M/W pipe registers // M/W pipe registers
//***************** //*****************
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); {FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
//#########################################
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, // BEGIN WRITEBACK STAGE
{FRegWriteM, FResultSelM, FmtM, FWriteIntM}, //#########################################
{FRegWriteW, FResultSelW, FmtW, FWriteIntW}); mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW);
//#########################################
// BEGIN WRITEBACK STAGE
//######################################### end else begin // no F_SUPPORTED; tie outputs low
assign FStallD = 0;
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); assign FWriteIntE = 0;
mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); assign FWriteIntM = 0;
assign FWriteIntW = 0;
assign FWriteDataE = 0;
end else begin // no F_SUPPORTED; tie outputs low assign FIntResM = 0;
assign FStallD = 0; assign FDivBusyE = 0;
assign FWriteIntE = 0; assign IllegalFPUInstrD = 1;
assign FWriteIntM = 0; assign SetFflagsM = 0;
assign FWriteIntW = 0; end
assign FWriteDataE = 0;
assign FIntResM = 0;
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
end
endgenerate endgenerate
endmodule // fpu endmodule // fpu

View File

@ -6,7 +6,7 @@ module fsm (done, load_rega, load_regb, load_regc,
input clk; input clk;
input reset; input reset;
input start; input start;
// input error; // input error;
input op_type; input op_type;
//***can use divbusy insted of holdinputs //***can use divbusy insted of holdinputs
output done; output done;
@ -113,8 +113,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S1: S1:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -129,8 +129,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S2: // iteration 1 S2: // iteration 1
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b1; load_regb = 1'b1;
load_regc = 1'b0; load_regc = 1'b0;
@ -145,8 +145,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S3: S3:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -161,8 +161,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S4: // iteration 2 S4: // iteration 2
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b1; load_regb = 1'b1;
load_regc = 1'b0; load_regc = 1'b0;
@ -177,8 +177,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S5: S5:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -193,8 +193,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S6: // iteration 3 S6: // iteration 3
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b1; load_regb = 1'b1;
load_regc = 1'b0; load_regc = 1'b0;
@ -209,8 +209,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S7: S7:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -225,8 +225,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S8: // q,qm,qp S8: // q,qm,qp
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -241,8 +241,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S9: // rem S9: // rem
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -257,8 +257,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S10: // done S10: // done
begin begin
done = 1'b1; done = 1'b1;
divBusy = 1'b0; divBusy = 1'b0;
holdInputs = 1'b0; holdInputs = 1'b0;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -273,8 +273,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S13: // start of sqrt path S13: // start of sqrt path
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -289,8 +289,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S14: S14:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -305,8 +305,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S15: // iteration 1 S15: // iteration 1
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b1; load_regb = 1'b1;
load_regc = 1'b0; load_regc = 1'b0;
@ -321,8 +321,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S16: S16:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -337,8 +337,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S17: S17:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -353,8 +353,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S18: // iteration 2 S18: // iteration 2
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b1; load_regb = 1'b1;
load_regc = 1'b0; load_regc = 1'b0;
@ -369,8 +369,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S19: S19:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -385,8 +385,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S20: S20:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -401,8 +401,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S21: // iteration 3 S21: // iteration 3
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b1; load_regb = 1'b1;
load_regc = 1'b0; load_regc = 1'b0;
@ -417,8 +417,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S22: S22:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -433,8 +433,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S23: S23:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b1; load_rega = 1'b1;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b1; load_regc = 1'b1;
@ -449,8 +449,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S24: // q,qm,qp S24: // q,qm,qp
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -465,8 +465,8 @@ module fsm (done, load_rega, load_regb, load_regc,
S25: // rem S25: // rem
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b1; divBusy = 1'b1;
holdInputs = 1'b1; holdInputs = 1'b1;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -476,13 +476,13 @@ module fsm (done, load_rega, load_regb, load_regc,
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b110; sel_muxb = 3'b110;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE = S26; NEXT_STATE = S27;
end end
S26: // done S26: // done
begin begin
done = 1'b1; done = 1'b1;
divBusy = 1'b0; divBusy = 1'b0;
holdInputs = 1'b0; holdInputs = 1'b0;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;
@ -497,8 +497,8 @@ module fsm (done, load_rega, load_regb, load_regc,
default: default:
begin begin
done = 1'b0; done = 1'b0;
divBusy = 1'b0; divBusy = 1'b0;
holdInputs = 1'b0; holdInputs = 1'b0;
load_rega = 1'b0; load_rega = 1'b0;
load_regb = 1'b0; load_regb = 1'b0;
load_regc = 1'b0; load_regc = 1'b0;