From 12e09a7ace68880dab8feb333b034724400f0a7a Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Tue, 20 Jul 2021 01:47:46 -0400 Subject: [PATCH] slight mod to fpdiv - still bug in batch vs. non-batch --- wally-pipelined/src/fpu/fpu.sv | 524 +++++++++++++++++---------------- wally-pipelined/src/fpu/fsm.sv | 106 +++---- 2 files changed, 316 insertions(+), 314 deletions(-) diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 2d1351ec5..f283f5e4f 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -25,23 +25,23 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic [31:0] InstrD, - input logic [`XLEN-1:0] ReadDataW, // Read data from memory - input logic [`XLEN-1:0] SrcAE, // Integer input being processed - input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg - input logic StallE, StallM, StallW, - input logic FlushE, FlushM, FlushW, - input logic [4:0] RdE, RdM, RdW, - output logic FRegWriteM, - output logic FStallD, // Stall the decode stage - output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory - output logic [`XLEN-1:0] FIntResM, - output logic FDivBusyE, // Is the divison/sqrt unit busy - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode from CSR + input logic [31:0] InstrD, + input logic [`XLEN-1:0] ReadDataW, // Read data from memory + input logic [`XLEN-1:0] SrcAE, // Integer input being processed + input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg + input logic StallE, StallM, StallW, + input logic FlushE, FlushM, FlushW, + input logic [4:0] RdE, RdM, RdW, + output logic FRegWriteM, + output logic FStallD, // Stall the decode stage + output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`XLEN-1:0] FIntResM, + output logic FDivBusyE, // Is the divison/sqrt unit busy + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS // *** folder at same level of src for tests fpu tests @@ -50,254 +50,256 @@ module fpu ( generate if (`F_SUPPORTED | `D_SUPPORTED) begin // control logic signal instantiation - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division - logic FWriteIntD; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; - logic [4:0] Adr1E, Adr2E, Adr3E; - - // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`XLEN-1:0] FSrcXMAligned; - logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding) - - // unpacking signals - logic XSgnE, YSgnE, ZSgnE; - logic [10:0] XExpE, YExpE, ZExpE; - logic [51:0] XFracE, YFracE, ZFracE; - logic XAssumed1E, YAssumed1E, ZAssumed1E; - logic XNaNE, YNaNE, ZNaNE; - logic XSNaNE, YSNaNE, ZSNaNE; - logic XDenormE, YDenormE, ZDenormE; - logic XZeroE, YZeroE, ZZeroE; - logic [10:0] BiasE; - logic XInfE, YInfE, ZInfE; - logic XExpMaxE; - logic XNormE; - - logic XSgnM, YSgnM, ZSgnM; - logic [10:0] XExpM, YExpM, ZExpM; - logic [51:0] XFracM, YFracM, ZFracM; - logic XNaNM, YNaNM, ZNaNM; - logic XSNaNM, YSNaNM, ZSNaNM; - logic XZeroM, YZeroM, ZZeroM; - logic XInfM, YInfM, ZInfM; - - // div/sqrt signals - logic [63:0] FDivResultM, FDivResultW; - logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; - logic FDivSqrtDoneE; - logic [63:0] DivInput1E, DivInput2E; - logic HoldInputs; // keep forwarded inputs arround durring division - - //fpu signals - logic [63:0] FMAResM, FMAResW; - logic [4:0] FMAFlgM, FMAFlgW; - - - logic [63:0] ReadResW; - - // add/cvt signals - logic [63:0] FAddResM, FAddResW; - logic [4:0] FAddFlgM, FAddFlgW; - logic [63:0] CvtResE, CvtResM; - logic [4:0] CvtFlgE, CvtFlgM; - - // cmp signals - logic CmpNVE, CmpNVM, CmpNVW; - logic [63:0] CmpResE, CmpResM, CmpResW; - - // fsgn signals - logic [63:0] SgnResE, SgnResM; - logic SgnNVE, SgnNVM, SgnNVW; - logic [63:0] FResM, FResW; - logic [4:0] FFlgM, FFlgW; - - // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM; - - // classify signals - logic [63:0] ClassResE, ClassResM; - - // 64-bit FPU result - logic [63:0] FPUResultW; - logic [4:0] FPUFlagsW; - - - //DECODE STAGE - - // top-level controller for FPU - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), - .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // regfile instantiation - fregfile fregfile (clk, reset, FRegWriteW, - InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResultW, - FRD1D, FRD2D, FRD3D); - - //***************** - // D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); - - - //EXECUTION STAGE - - // Hazard unit for FPU - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, + logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD; // Write to integer register + logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [4:0] Adr1E, Adr2E, Adr3E; + + // regfile signals + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`XLEN-1:0] FSrcXMAligned; + logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding) + + // unpacking signals + logic XSgnE, YSgnE, ZSgnE; + logic [10:0] XExpE, YExpE, ZExpE; + logic [51:0] XFracE, YFracE, ZFracE; + logic XAssumed1E, YAssumed1E, ZAssumed1E; + logic XNaNE, YNaNE, ZNaNE; + logic XSNaNE, YSNaNE, ZSNaNE; + logic XDenormE, YDenormE, ZDenormE; + logic XZeroE, YZeroE, ZZeroE; + logic [10:0] BiasE; + logic XInfE, YInfE, ZInfE; + logic XExpMaxE; + logic XNormE; + + logic XSgnM, YSgnM, ZSgnM; + logic [10:0] XExpM, YExpM, ZExpM; + logic [51:0] XFracM, YFracM, ZFracM; + logic XNaNM, YNaNM, ZNaNM; + logic XSNaNM, YSNaNM, ZSNaNM; + logic XZeroM, YZeroM, ZZeroM; + logic XInfM, YInfM, ZInfM; + + // div/sqrt signals + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; + logic [63:0] DivInput1E, DivInput2E; + logic HoldInputs; // keep forwarded inputs arround durring division + + //fpu signals + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; + + logic [63:0] ReadResW; + + // add/cvt signals + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; + logic [63:0] CvtResE, CvtResM; + logic [4:0] CvtFlgE, CvtFlgM; + + // cmp signals + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; + + // fsgn signals + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; + logic [63:0] FResM, FResW; + logic [4:0] FFlgM, FFlgW; + + // instantiation of W stage regfile signals + logic [63:0] AlignedSrcAM; + + // classify signals + logic [63:0] ClassResE, ClassResM; + + // 64-bit FPU result + logic [63:0] FPUResultW; + logic [4:0] FPUFlagsW; + + //DECODE STAGE + + // top-level controller for FPU + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); + + // regfile instantiation + fregfile fregfile (clk, reset, FRegWriteW, + InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, + FPUResultW, + FRD1D, FRD2D, FRD3D); + + //***************** + // D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); + + //EXECUTION STAGE + + // Hazard unit for FPU + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); - mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); - mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE); - - unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); + mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); + mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE); + + unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), + .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, + .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, + .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, + .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); // first of two-stage instance of floating-point fused multiply-add unit - fma fma (.clk, .reset, .FlushM, .StallM, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, - .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, - .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), - .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); - - // first and only instance of floating-point divider - logic fpdivClk; - - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(fpdivClk)); - - // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(HoldInputs)); - flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), - .en(1'b1), .clear(FDivSqrtDoneE), - .reset(reset), .clk(HoldInputs)); - //*** add round to nearest ties to max magnitude - fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), .P(~FmtE), .FDivBusyE, .HoldInputs, - .OvEn(1'b1), .UnEn(1'b1), .start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); + fma fma (.clk, .reset, .FlushM, .StallM, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, . + ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, + .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, + .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, + .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, + .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), + .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); + + // first and only instance of floating-point divider + logic fpdivClk; + + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(fpdivClk)); + + // capture the inputs for div/sqrt + flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(HoldInputs)); + flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), + .en(1'b1), .clear(FDivSqrtDoneE), + .reset(reset), .clk(HoldInputs)); + //*** add round to nearest ties to max magnitude + fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + .P(~FmtE), .FDivBusyE, .HoldInputs, + .OvEn(1'b1), .UnEn(1'b1), + .start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); + // .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, // .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, // .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - // assign FDivBusyE = 0; - // first of two-stage instance of floating-point add/cvt unit - faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, - .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); - - // first and only instance of floating-point comparator - fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); - - // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); - - // first and only instance of floating-point classify unit - fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); - - - fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); - - // output for store instructions - assign FWriteDataE = FSrcYE[`XLEN-1:0]; - - //***************** - // E/M pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); - // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); - flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM}); - flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM}); - flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM}); - flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, - {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, - {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - - - - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); - flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - - flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); - flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); - flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); - - flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); - - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - //BEGIN MEMORY STAGE - mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); - mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); - - // mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned); - mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); - mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM); - - //***************** - // M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - - flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); - - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); - - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - - flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, - {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); - - //######################################### - // BEGIN WRITEBACK STAGE - //######################################### - - mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); - mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); - - - end else begin // no F_SUPPORTED; tie outputs low - assign FStallD = 0; - assign FWriteIntE = 0; - assign FWriteIntM = 0; - assign FWriteIntW = 0; - assign FWriteDataE = 0; - assign FIntResM = 0; - assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; - assign SetFflagsM = 0; - end + // assign FDivBusyE = 0; + + // first of two-stage instance of floating-point add/cvt unit + faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, + .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); + + // first and only instance of floating-point comparator + fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, + .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, + .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); + + // first and only instance of floating-point sign converter + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); + + // first and only instance of floating-point classify unit + fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); + + fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); + + // output for store instructions + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + + //***************** + // E/M pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); + // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); + // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); + flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM}); + flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM}); + flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM}); + flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, + {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); + + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); + + flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); + flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); + + flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, + {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); + + //BEGIN MEMORY STAGE + mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); + mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); + + // mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned); + mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); + mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM); + + //***************** + // M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, + {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); + + //######################################### + // BEGIN WRITEBACK STAGE + //######################################### + mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); + mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); + + + end else begin // no F_SUPPORTED; tie outputs low + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; + end endgenerate - + endmodule // fpu diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv index 0e15f5534..8991fb71d 100755 --- a/wally-pipelined/src/fpu/fsm.sv +++ b/wally-pipelined/src/fpu/fsm.sv @@ -6,7 +6,7 @@ module fsm (done, load_rega, load_regb, load_regc, input clk; input reset; input start; -// input error; + // input error; input op_type; //***can use divbusy insted of holdinputs output done; @@ -113,8 +113,8 @@ module fsm (done, load_rega, load_regb, load_regc, S1: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -129,8 +129,8 @@ module fsm (done, load_rega, load_regb, load_regc, S2: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -145,8 +145,8 @@ module fsm (done, load_rega, load_regb, load_regc, S3: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -161,8 +161,8 @@ module fsm (done, load_rega, load_regb, load_regc, S4: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -177,8 +177,8 @@ module fsm (done, load_rega, load_regb, load_regc, S5: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -193,8 +193,8 @@ module fsm (done, load_rega, load_regb, load_regc, S6: // iteration 3 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -209,8 +209,8 @@ module fsm (done, load_rega, load_regb, load_regc, S7: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -225,8 +225,8 @@ module fsm (done, load_rega, load_regb, load_regc, S8: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -241,8 +241,8 @@ module fsm (done, load_rega, load_regb, load_regc, S9: // rem begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -257,8 +257,8 @@ module fsm (done, load_rega, load_regb, load_regc, S10: // done begin done = 1'b1; - divBusy = 1'b0; - holdInputs = 1'b0; + divBusy = 1'b0; + holdInputs = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -273,8 +273,8 @@ module fsm (done, load_rega, load_regb, load_regc, S13: // start of sqrt path begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -289,8 +289,8 @@ module fsm (done, load_rega, load_regb, load_regc, S14: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -305,8 +305,8 @@ module fsm (done, load_rega, load_regb, load_regc, S15: // iteration 1 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -321,8 +321,8 @@ module fsm (done, load_rega, load_regb, load_regc, S16: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -337,8 +337,8 @@ module fsm (done, load_rega, load_regb, load_regc, S17: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -353,8 +353,8 @@ module fsm (done, load_rega, load_regb, load_regc, S18: // iteration 2 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -369,8 +369,8 @@ module fsm (done, load_rega, load_regb, load_regc, S19: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -385,8 +385,8 @@ module fsm (done, load_rega, load_regb, load_regc, S20: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -401,8 +401,8 @@ module fsm (done, load_rega, load_regb, load_regc, S21: // iteration 3 begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -417,8 +417,8 @@ module fsm (done, load_rega, load_regb, load_regc, S22: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -433,8 +433,8 @@ module fsm (done, load_rega, load_regb, load_regc, S23: begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -449,8 +449,8 @@ module fsm (done, load_rega, load_regb, load_regc, S24: // q,qm,qp begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -465,8 +465,8 @@ module fsm (done, load_rega, load_regb, load_regc, S25: // rem begin done = 1'b0; - divBusy = 1'b1; - holdInputs = 1'b1; + divBusy = 1'b1; + holdInputs = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -476,13 +476,13 @@ module fsm (done, load_rega, load_regb, load_regc, sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - NEXT_STATE = S26; - end + NEXT_STATE = S27; + end S26: // done begin done = 1'b1; - divBusy = 1'b0; - holdInputs = 1'b0; + divBusy = 1'b0; + holdInputs = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -497,8 +497,8 @@ module fsm (done, load_rega, load_regb, load_regc, default: begin done = 1'b0; - divBusy = 1'b0; - holdInputs = 1'b0; + divBusy = 1'b0; + holdInputs = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0;