cvw/wally-pipelined/src/fpu/fpu.sv

304 lines
17 KiB
Systemverilog
Raw Normal View History

2021-05-24 13:28:16 +00:00
///////////////////////////////////////////
//
// Written: Katherine Parry, James Stine, Brett Mathis
2021-06-23 20:42:40 +00:00
// Modified: 6/23/2021
2021-05-24 13:28:16 +00:00
//
// Purpose: FPU
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
2021-04-08 18:03:21 +00:00
`include "wally-config.vh"
2021-04-04 18:09:13 +00:00
module fpu (
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU
input logic [`XLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU)
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU)
output logic FRegWriteM, // FP register write enable
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM // FMA flags (to privileged unit)
2021-07-24 18:59:57 +00:00
);
//*** make everything FLEN at some point
//*** add the 128 bit support to the if statement when needed
//*** make new tests for fp using testfloat that include flag checking and all rounding modes
//*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
//*** only fma/mul and fp <-> int convert flags have been tested. test the others.
// FPU specifics:
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// single stored in a double: | 32 1s | single precision value |
// - sets the underflow after rounding
2021-07-29 03:49:21 +00:00
generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu
2021-07-24 18:59:57 +00:00
// control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2)
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is normal
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result
logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
logic [63:0] ClassResE, ClassResM; // classify result
logic [63:0] CmpResE, CmpResM; // compare result
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
logic [63:0] SgnResE, SgnResM; // sign injection result
logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid)
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [63:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic FDivClk; // clock for divide/squareroot unit
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
// DECODE STAGE
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
.a4(RdW), .wd4(FPUResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// D/E pipeline registers
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
// EXECUTION STAGE
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0},
{2'b0, {10{1'b1}}, 52'b0},
{FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)},
FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux (FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
// unpacking unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpacking unpacking (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// fpdivsqrt using Goldschmidt's iteration
floprc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
.clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
floprc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
.clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
floprc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}),
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}),
.clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk(clk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
.XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ,
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// convert from signle to double and vice versa
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
// compare unit
// - computation is done in one stage
// - writes to FP file durring min/max instructions
// - other comparisons write a 1 or 0 to the integer register
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FSrcXE, .FSrcYE, .FOpCtrlE,
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
.Invalid(CmpNVE), .CmpResE);
// sign injection unit
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
.SgnNVE, .SgnResE);
// classify
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
.XSNaNE, .ClassResE);
// Convert
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
.CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0];
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
// select a result that may be written to the FP register
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
// E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(65) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(11) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
// BEGIN MEMORY STAGE
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
2021-07-24 18:59:57 +00:00
// M/W pipe registers
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
// BEGIN WRITEBACK STAGE
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register
mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
2021-07-24 18:59:57 +00:00
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;
assign FWriteIntM = 0;
assign FWriteIntW = 0;
assign FWriteDataE = 0;
assign FIntResM = 0;
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
2021-07-24 18:59:57 +00:00
end
endgenerate
2021-06-01 19:45:32 +00:00
endmodule // fpu