/////////////////////////////////////////// // // Written: Katherine Parry, Bret Mathis // Modified: 6/23/2021 // // Purpose: FPU // // A component of the Wally configurable RISC-V project. // // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software // is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// `include "wally-config.vh" module fpu ( input logic clk, input logic reset, input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [31:0] InstrD, input logic [`XLEN-1:0] ReadDataW, // Read data from memory input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, input logic [4:0] RdE, RdM, RdW, output logic FRegWriteM, output logic FStallD, // Stall the decode stage output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FIntResM, output logic FDivBusyE, // Is the divison/sqrt unit busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS // *** folder at same level of src for tests fpu tests // qa.b // u1.52 - u sunsigned, q signed generate if (`F_SUPPORTED | `D_SUPPORTED) begin // control logic signal instantiation logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [1:0] FResSelD, FResSelE, FResSelM; logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage logic [`XLEN-1:0] FSrcXMAligned; logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding) logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) // unpacking signals logic XSgnE, YSgnE, ZSgnE; logic [10:0] XExpE, YExpE, ZExpE; logic [52:0] XManE, YManE, ZManE; logic XNaNE, YNaNE, ZNaNE; logic XSNaNE, YSNaNE, ZSNaNE; logic XDenormE, YDenormE, ZDenormE; logic XZeroE, YZeroE, ZZeroE; logic [10:0] BiasE; logic XInfE, YInfE, ZInfE; logic XExpMaxE; logic XNormE; logic XSgnM, YSgnM, ZSgnM; logic [10:0] XExpM, YExpM, ZExpM; logic [52:0] XManM, YManM, ZManM; logic XNaNM, YNaNM, ZNaNM; logic XSNaNM, YSNaNM, ZSNaNM; logic XZeroM, YZeroM, ZZeroM; logic XInfM, YInfM, ZInfM; // div/sqrt signals logic [63:0] FDivResultM, FDivResultW; logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; logic FDivSqrtDoneE; logic [63:0] DivInput1E, DivInput2E; logic HoldInputs; // keep forwarded inputs arround durring division //fpu signals logic [63:0] FMAResM, FMAResW; logic [4:0] FMAFlgM, FMAFlgW; logic [63:0] ReadResW; // add/cvt signals logic [63:0] FAddResM, FAddResW; logic [4:0] FAddFlgM, FAddFlgW; logic [63:0] CvtResE, CvtResM; logic [4:0] CvtFlgE, CvtFlgM; // cmp signals logic CmpNVE, CmpNVM, CmpNVW; logic [63:0] CmpResE, CmpResM, CmpResW; // fsgn signals logic [63:0] SgnResE, SgnResM; logic SgnNVE, SgnNVM, SgnNVW; logic [63:0] FResM, FResW; logic [4:0] FFlgM, FFlgW; // instantiation of W stage regfile signals logic [63:0] AlignedSrcAM; // classify signals logic [63:0] ClassResE, ClassResM; // 64-bit FPU result logic [63:0] FPUResultW; logic [4:0] FPUFlagsW; //DECODE STAGE // top-level controller for FPU fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); // regfile instantiation fregfile fregfile (clk, reset, FRegWriteW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, FPUResultW, FRD1D, FRD2D, FRD3D); //***************** // D/E pipe registers //***************** flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); //EXECUTION STAGE // Hazard unit for FPU fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); // forwarding muxs mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE); mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); mux2 #(64) fzmulmux(FPreSrcZE, 64'b0, FOpCtrlE[2], FSrcZE); // Force Z to be 0 for multiply instructions unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); // first of two-stage instance of floating-point fused multiply-add unit fma fma (.clk, .reset, .FlushM, .StallM, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, . ZManE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, .FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]), .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); // first and only instance of floating-point divider logic fpdivClk; clockgater fpdivclkg(.E(FDivStartE), .SE(1'b0), .CLK(clk), .ECLK(fpdivClk)); // capture the inputs for div/sqrt flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E), .en(1'b1), .clear(FDivSqrtDoneE), .reset(reset), .clk(HoldInputs)); flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E), .en(1'b1), .clear(FDivSqrtDoneE), .reset(reset), .clk(HoldInputs)); //*** add round to nearest ties to max magnitude fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), .P(~FmtE), .FDivBusyE, .HoldInputs, .OvEn(1'b1), .UnEn(1'b1), .start(FDivStartE), .reset, .clk(fpdivClk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); // .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, // .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, // .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); // assign FDivBusyE = 0; // first of two-stage instance of floating-point add/cvt unit faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); // first and only instance of floating-point comparator fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE); // first and only instance of floating-point sign converter fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE); // first and only instance of floating-point classify unit fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); // output for store instructions assign FWriteDataE = FSrcYE[`XLEN-1:0]; //***************** // E/M pipe registers //***************** flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); // flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM); // flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM); flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); flopenrc #(65) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); flopenrc #(65) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM}); flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM); flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM); flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM, {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); //BEGIN MEMORY STAGE mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); // mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned); mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM); //***************** // M/W pipe registers //***************** flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW, {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); //######################################### // BEGIN WRITEBACK STAGE //######################################### mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); end else begin // no F_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; assign FWriteIntM = 0; assign FWriteIntW = 0; assign FWriteDataE = 0; assign FIntResM = 0; assign FDivBusyE = 0; assign IllegalFPUInstrD = 1; assign SetFflagsM = 0; end endgenerate endmodule // fpu