Moved generate statements for optional units into wallypipelinedhart

This commit is contained in:
David Harris 2021-12-19 16:53:41 -08:00
parent 5e1c3e322b
commit 1196e5c191
12 changed files with 424 additions and 467 deletions

View File

@ -44,258 +44,244 @@ module fpu (
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
);
//*** make everything FLEN at some point
//*** add the 128 bit support to the if statement when needed
//*** make new tests for fp using testfloat that include flag checking and all rounding modes
//*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
//*** only fma/mul and fp <-> int convert flags have been tested. test the others.
//*** make everything FLEN at some point
//*** add the 128 bit support to the if statement when needed
//*** make new tests for fp using testfloat that include flag checking and all rounding modes
//*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
//*** only fma/mul and fp <-> int convert flags have been tested. test the others.
// FPU specifics:
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// single stored in a double: | 32 1s | single precision value |
// - sets the underflow after rounding
// FPU specifics:
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// single stored in a double: | 32 1s | single precision value |
// - sets the underflow after rounding
generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu
// control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding)
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff)
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is normal
logic FmtQ;
logic FOpCtrlQ;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM; // FMA/multiply result
logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] CvtFpResE; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags
logic [63:0] CvtResE; // FP <-> int convert result
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
logic [63:0] ClassResE; // classify result
logic [63:0] CmpResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [63:0] SgnResE; // sign injection result
logic SgnNVE; // sign injection invalid flag (Not Valid)
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [63:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic load_preload; // enable for FF on fpdivsqrt
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
// regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding)
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// DECODE STAGE
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
.a4(RdW), .wd4(FPUResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff)
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is normal
logic FmtQ;
logic FOpCtrlQ;
// D/E pipeline registers
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM; // FMA/multiply result
logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] CvtFpResE; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags
logic [63:0] CvtResE; // FP <-> int convert result
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
logic [63:0] ClassResE; // classify result
logic [63:0] CmpResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [63:0] SgnResE; // sign injection result
logic SgnNVE; // sign injection invalid flag (Not Valid)
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [63:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic load_preload; // enable for FF on fpdivsqrt
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
// EXECUTION STAGE
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0},
{2'b0, {10{1'b1}}, 52'b0},
{FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)},
FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux (FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
// unpacking unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpacking unpacking (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// fpdivsqrt using Goldschmidt's iteration
flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}),
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
.reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
.XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// DECODE STAGE
// convert from signle to double and vice versa
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
// compare unit
// - computation is done in one stage
// - writes to FP file durring min/max instructions
// - other comparisons write a 1 or 0 to the integer register
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FSrcXE, .FSrcYE, .FOpCtrlE,
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
.Invalid(CmpNVE), .CmpResE);
// sign injection unit
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
.SgnNVE, .SgnResE);
// classify
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
.XSNaNE, .ClassResE);
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// Convert
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
.CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0];
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
// select a result that may be written to the FP register
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
// E/M pipe registers
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
.a4(RdW), .wd4(FPUResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(65) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE},
{FRegWriteM, FResultSelM, FrmM, FmtM});
// BEGIN MEMORY STAGE
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
// M/W pipe registers
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM},
{FRegWriteW, FResultSelW, FmtW});
// BEGIN WRITEBACK STAGE
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register
mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
// D/E pipeline registers
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;
assign FWriteDataE = 0;
assign FIntResM = 0;
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
end
endgenerate
// EXECUTION STAGE
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0},
{2'b0, {10{1'b1}}, 52'b0},
{FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)},
FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux (FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
// unpacking unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpacking unpacking (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// fpdivsqrt using Goldschmidt's iteration
flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}),
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
.reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
.XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// convert from signle to double and vice versa
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
// compare unit
// - computation is done in one stage
// - writes to FP file durring min/max instructions
// - other comparisons write a 1 or 0 to the integer register
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FSrcXE, .FSrcYE, .FOpCtrlE,
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
.Invalid(CmpNVE), .CmpResE);
// sign injection unit
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
.SgnNVE, .SgnResE);
// classify
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
.XSNaNE, .ClassResE);
// Convert
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
.CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0];
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
// select a result that may be written to the FP register
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
// E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(65) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE},
{FRegWriteM, FResultSelM, FrmM, FmtM});
// BEGIN MEMORY STAGE
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
// M/W pipe registers
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM},
{FRegWriteW, FResultSelW, FmtW});
// BEGIN WRITEBACK STAGE
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register
mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
endmodule // fpu

View File

@ -40,65 +40,50 @@ module muldiv (
input logic StallM, StallW, FlushM, FlushW
);
generate
if (`M_SUPPORTED) begin
logic [`XLEN-1:0] MulDivResultM;
logic [`XLEN-1:0] PrelimResultM;
logic [`XLEN-1:0] QuotM, RemM;
logic [`XLEN*2-1:0] ProdM;
logic [`XLEN-1:0] MulDivResultM;
logic [`XLEN-1:0] PrelimResultM;
logic [`XLEN-1:0] QuotM, RemM;
logic [`XLEN*2-1:0] ProdM;
logic DivE;
logic DivSignedE;
logic W64M;
// Multiplier
mul mul(
.clk, .reset,
.StallM, .FlushM,
// .SrcAE, .SrcBE,
.ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
.Funct3E,
.ProdM
);
logic DivE;
logic DivSignedE;
logic W64M;
// Divide
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
assign DivE = MulDivE & Funct3E[2];
assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM,
.DivSignedE, .W64E, .DivE, .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
// Result multiplexer
always_comb
case (Funct3M)
3'b000: PrelimResultM = ProdM[`XLEN-1:0];
3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b100: PrelimResultM = QuotM;
3'b101: PrelimResultM = QuotM;
3'b110: PrelimResultM = RemM;
3'b111: PrelimResultM = RemM;
endcase
// Handle sign extension for W-type instructions
flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M);
if (`XLEN == 64) begin // RV64 has W-type instructions
assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM;
end else begin // RV32 has no W-type instructions
assign MulDivResultM = PrelimResultM;
end
// Multiplier
mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM);
// Writeback stage pipeline register
// Divide
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
assign DivE = MulDivE & Funct3E[2];
assign DivSignedE = ~Funct3E[0];
intdivrestoring div(.clk, .reset, .StallM, .DivSignedE, .W64E, .DivE,
.ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
// Result multiplexer
always_comb
case (Funct3M)
3'b000: PrelimResultM = ProdM[`XLEN-1:0];
3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];
3'b100: PrelimResultM = QuotM;
3'b101: PrelimResultM = QuotM;
3'b110: PrelimResultM = RemM;
3'b111: PrelimResultM = RemM;
endcase
flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW);
end else begin // no M instructions supported
assign MulDivResultW = 0;
assign DivBusyE = 0;
end
endgenerate
// Handle sign extension for W-type instructions
flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M);
generate
if (`XLEN == 64) begin:resmux // RV64 has W-type instructions
assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM;
end else begin:resmux // RV32 has no W-type instructions
assign MulDivResultM = PrelimResultM;
end
endgenerate
// Writeback stage pipeline register
flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW);
endmodule // muldiv

View File

@ -86,76 +86,48 @@ module csr #(parameter
logic IllegalCSRCAccessM, IllegalCSRMAccessM, IllegalCSRSAccessM, IllegalCSRUAccessM, IllegalCSRNAccessM, InsufficientCSRPrivilegeM;
logic IllegalCSRMWriteReadonlyM;
generate
if (`ZICSR_SUPPORTED) begin
// modify CSRs
always_comb begin
// Choose either rs1 or uimm[4:0] as source
CSRSrcM = InstrM[14] ? {{(`XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM;
// Compute AND/OR modification
CSRRWM = CSRSrcM;
CSRRSM = CSRReadValM | CSRSrcM;
CSRRCM = CSRReadValM & ~CSRSrcM;
case (InstrM[13:12])
2'b01: CSRWriteValM = CSRRWM;
2'b10: CSRWriteValM = CSRRSM;
2'b11: CSRWriteValM = CSRRCM;
default: CSRWriteValM = CSRReadValM;
endcase
end
// modify CSRs
always_comb begin
// Choose either rs1 or uimm[4:0] as source
CSRSrcM = InstrM[14] ? {{(`XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM;
// Compute AND/OR modification
CSRRWM = CSRSrcM;
CSRRSM = CSRReadValM | CSRSrcM;
CSRRCM = CSRReadValM & ~CSRSrcM;
case (InstrM[13:12])
2'b01: CSRWriteValM = CSRRWM;
2'b10: CSRWriteValM = CSRRSM;
2'b11: CSRWriteValM = CSRRCM;
default: CSRWriteValM = CSRReadValM;
endcase
end
// write CSRs
assign CSRAdrM = InstrM[31:20];
assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM;
assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment
assign NextCauseM = TrapM ? CauseM : CSRWriteValM;
assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM;
assign CSRMWriteM = CSRWriteM && (PrivilegeModeW == `M_MODE);
assign CSRSWriteM = CSRWriteM && (|PrivilegeModeW);
assign CSRUWriteM = CSRWriteM;
// write CSRs
assign CSRAdrM = InstrM[31:20];
assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM;
assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment
assign NextCauseM = TrapM ? CauseM : CSRWriteValM;
assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM;
assign CSRMWriteM = CSRWriteM && (PrivilegeModeW == `M_MODE);
assign CSRSWriteM = CSRWriteM && (|PrivilegeModeW);
assign CSRUWriteM = CSRWriteM;
csri csri(.*);
csrsr csrsr(.*);
csrc counters(.*);
csrm csrm(.*); // Machine Mode CSRs
csrs csrs(.*);
csrn csrn(.CSRNWriteM(CSRUWriteM), .*); // User Mode Exception Registers
csru csru(.*); // Floating Point Flags are part of User MOde
csri csri(.*);
csrsr csrsr(.*);
csrc counters(.*);
csrm csrm(.*); // Machine Mode CSRs
csrs csrs(.*);
csrn csrn(.CSRNWriteM(CSRUWriteM), .*); // User Mode Exception Registers
csru csru(.*); // Floating Point Flags are part of User MOde
// merge CSR Reads
assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM;
// *** add W stall 2/22/21 dh to try fixing memory stalls
// floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRReadValW);
flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW);
// merge CSR Reads
assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM;
flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW);
// merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient
assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) ||
(CSRAdrM[9:8] == 2'b01 && PrivilegeModeW == `U_MODE);
assign IllegalCSRAccessM = ((IllegalCSRCAccessM && IllegalCSRMAccessM &&
IllegalCSRSAccessM && IllegalCSRUAccessM && IllegalCSRNAccessM ||
InsufficientCSRPrivilegeM) && CSRReadM) || IllegalCSRMWriteReadonlyM;
end else begin // CSRs not implemented
assign STATUS_MPP = 2'b11;
assign STATUS_SPP = 2'b0;
assign STATUS_TSR = 0;
assign MEPC_REGW = 0;
assign SEPC_REGW = 0;
assign UEPC_REGW = 0;
assign UTVEC_REGW = 0;
assign STVEC_REGW = 0;
assign MTVEC_REGW = 0;
assign MEDELEG_REGW = 0;
assign MIDELEG_REGW = 0;
assign SEDELEG_REGW = 0;
assign SIDELEG_REGW = 0;
assign SATP_REGW = 0;
assign MIP_REGW = 0;
assign MIE_REGW = 0;
assign STATUS_MIE = 0;
assign STATUS_SIE = 0;
assign FRM_REGW = 0;
assign CSRReadValM = 0;
assign IllegalCSRAccessM = CSRReadM;
end
endgenerate
// merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient
assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) ||
(CSRAdrM[9:8] == 2'b01 && PrivilegeModeW == `U_MODE);
assign IllegalCSRAccessM = ((IllegalCSRCAccessM && IllegalCSRMAccessM &&
IllegalCSRSAccessM && IllegalCSRUAccessM && IllegalCSRNAccessM ||
InsufficientCSRPrivilegeM) && CSRReadM) || IllegalCSRMWriteReadonlyM;
endmodule

View File

@ -239,8 +239,6 @@ module privileged (
.ExceptionM,
.PendingInterruptM,
.PrivilegedNextPCM, .CauseM, .NextFaultMtvalM);
endmodule

View File

@ -198,7 +198,6 @@ module wallypipelinedhart (
); // instruction fetch unit: PC, branch prediction, instruction cache
ieu ieu(
.clk, .reset,
@ -276,7 +275,7 @@ module wallypipelinedhart (
.LSUStall); // change to LSUStall
// *** Ross: please make EBU conditional when only supporting internal memories
ahblite ebu(// IFU connections
.clk, .reset,
@ -295,22 +294,7 @@ module wallypipelinedhart (
.HWRITED);
muldiv mdu(
.clk, .reset,
// Execute Stage interface
// .SrcAE, .SrcBE,
.ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
.Funct3E, .Funct3M,
.MulDivE, .W64E,
// Writeback stage
.MulDivResultW,
// Divide Done
.DivBusyE,
// hazards
.StallM, .StallW, .FlushM, .FlushW
); // multiply and divide unit
hazard hzu(
hazard hzu(
.BPPredWrongE, .CSRWritePendingDEM, .RetM, .TrapM,
.LoadStallD, .StoreStallD, .MulDivStallD, .CSRRdStallD,
.LSUStall, .ICacheStallF,
@ -323,57 +307,89 @@ module wallypipelinedhart (
.FlushF, .FlushD, .FlushE, .FlushM, .FlushW
); // global stall and flush control
// Priveleged block operates in M and W stages, handling CSRs and exceptions
privileged priv(
.clk, .reset,
.FlushD, .FlushE, .FlushM, .FlushW,
.StallD, .StallE, .StallM, .StallW,
.CSRReadM, .CSRWriteM, .SrcAM, .PCM,
.InstrM, .CSRReadValW, .PrivilegedNextPCM,
.RetM, .TrapM,
.ITLBFlushF, .DTLBFlushM,
.InstrValidM, .CommittedM,
.FRegWriteM, .LoadStallD,
.BPPredDirWrongM, .BTBPredPCWrongM,
.RASPredPCWrongM, .BPPredClassNonCFIWrongM,
.InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM,
.ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM,
.WalkerInstrPageFaultF, .WalkerLoadPageFaultM, .WalkerStorePageFaultM,
.InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD,
.LoadMisalignedFaultM, .StoreMisalignedFaultM,
.TimerIntM, .ExtIntM, .SwIntM,
.MTIME_CLINT, .MTIMECMP_CLINT,
.InstrMisalignedAdrM, .MemAdrM,
.SetFflagsM,
// Trap signals from pmp/pma in mmu
// *** do these need to be split up into one for dmem and one for ifu?
// instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem?
.InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM,
.ExceptionM, .PendingInterruptM, .IllegalFPUInstrE,
.PrivilegeModeW, .SATP_REGW,
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.FRM_REGW,.BreakpointFaultM, .EcallFaultM
);
generate
if (`ZICSR_SUPPORTED) begin:priv
privileged priv(
.clk, .reset,
.FlushD, .FlushE, .FlushM, .FlushW,
.StallD, .StallE, .StallM, .StallW,
.CSRReadM, .CSRWriteM, .SrcAM, .PCM,
.InstrM, .CSRReadValW, .PrivilegedNextPCM,
.RetM, .TrapM,
.ITLBFlushF, .DTLBFlushM,
.InstrValidM, .CommittedM,
.FRegWriteM, .LoadStallD,
.BPPredDirWrongM, .BTBPredPCWrongM,
.RASPredPCWrongM, .BPPredClassNonCFIWrongM,
.InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM,
.ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM,
.WalkerInstrPageFaultF, .WalkerLoadPageFaultM, .WalkerStorePageFaultM,
.InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD,
.LoadMisalignedFaultM, .StoreMisalignedFaultM,
.TimerIntM, .ExtIntM, .SwIntM,
.MTIME_CLINT, .MTIMECMP_CLINT,
.InstrMisalignedAdrM, .MemAdrM,
.SetFflagsM,
// Trap signals from pmp/pma in mmu
// *** do these need to be split up into one for dmem and one for ifu?
// instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem?
.InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM,
.ExceptionM, .PendingInterruptM, .IllegalFPUInstrE,
.PrivilegeModeW, .SATP_REGW,
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.FRM_REGW,.BreakpointFaultM, .EcallFaultM
);
end else begin
assign CSRReadValW = 0;
assign PrivilegedNextPCM = 0;
assign RetM = 0;
assign TrapM = 0;
assign ITLBFlushF = 0;
assign DTLBFlushM = 0;
end
if (`M_SUPPORTED) begin:mdu
muldiv mdu(
.clk, .reset,
.ForwardedSrcAE, .ForwardedSrcBE,
.Funct3E, .Funct3M, .MulDivE, .W64E,
.MulDivResultW, .DivBusyE,
.StallM, .StallW, .FlushM, .FlushW
);
end else begin // no M instructions supported
assign MulDivResultW = 0;
assign DivBusyE = 0;
end
fpu fpu(
.clk, .reset,
.FRM_REGW, // Rounding mode from CSR
.InstrD, // instruction from IFU
.ReadDataW,// Read data from memory
.ForwardedSrcAE, // Integer input being processed (from IEU)
.StallE, .StallM, .StallW, // stall signals from HZU
.FlushE, .FlushM, .FlushW, // flush signals from HZU
.RdM, .RdW, // which FP register to write to (from IEU)
.FRegWriteM, // FP register write enable
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory
.FIntResM, // data to be written to integer register
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit)
); // floating point unit
if (`F_SUPPORTED) begin:fpu
fpu fpu(
.clk, .reset,
.FRM_REGW, // Rounding mode from CSR
.InstrD, // instruction from IFU
.ReadDataW,// Read data from memory
.ForwardedSrcAE, // Integer input being processed (from IEU)
.StallE, .StallM, .StallW, // stall signals from HZU
.FlushE, .FlushM, .FlushW, // flush signals from HZU
.RdM, .RdW, // which FP register to write to (from IEU)
.FRegWriteM, // FP register write enable
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory
.FIntResM, // data to be written to integer register
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit)
); // floating point unit
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0;
assign FWriteIntE = 0;
assign FWriteDataE = 0;
assign FIntResM = 0;
assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0;
end
endgenerate
// Priveleged block operates in M and W stages, handling CSRs and exceptions
endmodule

View File

@ -174,7 +174,7 @@ module testbench();
// Useful Aliases
`define RF dut.hart.ieu.dp.regf.rf
`define PC dut.hart.ifu.pcreg.q
`define CSR_BASE dut.hart.priv.csr.genblk1
`define CSR_BASE dut.hart.priv.priv.csr
`define HPMCOUNTER `CSR_BASE.counters.genblk1.HPMCOUNTER_REGW
`define PMP_BASE `CSR_BASE.csrm.genblk4
`define PMPCFG genblk2.PMPCFGreg.q
@ -210,8 +210,8 @@ module testbench();
`define STATUS_MIE `CSR_BASE.csrsr.STATUS_MIE
`define STATUS_SIE `CSR_BASE.csrsr.STATUS_SIE
`define STATUS_UIE `CSR_BASE.csrsr.STATUS_UIE
`define PRIV dut.hart.priv.privmodereg.q
`define INSTRET dut.hart.priv.csr.genblk1.counters.genblk1.genblk2.INSTRETreg.q
`define PRIV dut.hart.priv.priv.privmodereg.q
`define INSTRET dut.hart.priv.priv.csr.counters.genblk1.genblk2.INSTRETreg.q
// Common Macros
`define checkCSR(CSR) \
begin \
@ -308,9 +308,9 @@ module testbench();
integer ramFile;
integer readResult;
initial begin
force dut.hart.priv.SwIntM = 0;
force dut.hart.priv.TimerIntM = 0;
force dut.hart.priv.ExtIntM = 0;
force dut.hart.priv.priv.SwIntM = 0;
force dut.hart.priv.priv.TimerIntM = 0;
force dut.hart.priv.priv.ExtIntM = 0;
$readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootrom.bootrom.RAM, 'h1000 >> 3);
$readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
$readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.mem);
@ -365,7 +365,7 @@ module testbench();
// on the next falling edge the expected state is compared to the wally state.
// step 0: read the expected state
assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.trap.InstrPageFaultM & ~dut.hart.priv.trap.InterruptM & ~dut.hart.StallM;
assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.priv.trap.InstrPageFaultM & ~dut.hart.priv.priv.trap.InterruptM & ~dut.hart.StallM;
`define SCAN_NEW_INSTR_FROM_TRACE(STAGE) \
// always check PC, instruction bits \
if (checkInstrM) begin \
@ -479,7 +479,7 @@ module testbench();
end else begin // update MIP immediately
$display("%tns: Updating MIP to %x",$time,NextMIPexpected);
MIPexpected = NextMIPexpected;
force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected;
force dut.hart.priv.priv.csr.csri.MIP_REGW = MIPexpected;
end
// $display("%tn: ExpectedCSRArrayM = %p",$time,ExpectedCSRArrayM);
// $display("%tn: ExpectedCSRArrayValueM = %p",$time,ExpectedCSRArrayValueM);
@ -491,11 +491,11 @@ module testbench();
// $display("%tn: ExpectedCSRArrayValueM[NumCSRM] %x",$time,ExpectedCSRArrayValueM[NumCSRM]);
end
if(RequestDelayedMIP & checkInstrM) begin
$display("%tns: Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.csr.genblk1.csrm.MEPC_REGW);
$display("%tns: Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.priv.csr.csrm.MEPC_REGW);
$display("%tns: Updating MIP to %x",$time,NextMIPexpected);
MIPexpected = NextMIPexpected;
force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected;
$display("%tns: Finished Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.csr.genblk1.csrm.MEPC_REGW);
force dut.hart.priv.priv.csr.csri.MIP_REGW = MIPexpected;
$display("%tns: Finished Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.priv.csr.csrm.MEPC_REGW);
RequestDelayedMIP = 0;
end
end
@ -576,7 +576,7 @@ module testbench();
`checkEQ("PCW",PCW,ExpectedPCW)
//`checkEQ("InstrW",InstrW,ExpectedInstrW) <-- not viable because of
// compressed to uncompressed conversion
`checkEQ("Instr Count",dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW,InstrCountW)
`checkEQ("Instr Count",dut.hart.priv.priv.csr.counters.genblk1.INSTRET_REGW,InstrCountW)
#2; // delay 2 ns.
if(`DEBUG_TRACE >= 5) begin
$display("%tns, %d instrs: Reg Write Address %02d ? expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW);
@ -601,19 +601,19 @@ module testbench();
// check csr
for(NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) begin
case(ExpectedCSRArrayW[NumCSRPostWIndex])
"mhartid": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW)
"mstatus": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW)
"mtvec": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW)
"mip": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIP_REGW)
"mie": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIE_REGW)
"mideleg": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW)
"medeleg": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW)
"mepc": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MEPC_REGW)
"mtval": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW)
"sepc": `checkCSR(dut.hart.priv.csr.genblk1.csrs.SEPC_REGW)
"scause": `checkCSR(dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW)
"stvec": `checkCSR(dut.hart.priv.csr.genblk1.csrs.STVEC_REGW)
"stval": `checkCSR(dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW)
"mhartid": `checkCSR(dut.hart.priv.priv.csr.csrm.MHARTID_REGW)
"mstatus": `checkCSR(dut.hart.priv.priv.csr.csrm.MSTATUS_REGW)
"mtvec": `checkCSR(dut.hart.priv.priv.csr.csrm.MTVEC_REGW)
"mip": `checkCSR(dut.hart.priv.priv.csr.csrm.MIP_REGW)
"mie": `checkCSR(dut.hart.priv.priv.csr.csrm.MIE_REGW)
"mideleg": `checkCSR(dut.hart.priv.priv.csr.csrm.MIDELEG_REGW)
"medeleg": `checkCSR(dut.hart.priv.priv.csr.csrm.MEDELEG_REGW)
"mepc": `checkCSR(dut.hart.priv.priv.csr.csrm.MEPC_REGW)
"mtval": `checkCSR(dut.hart.priv.priv.csr.csrm.MTVAL_REGW)
"sepc": `checkCSR(dut.hart.priv.priv.csr.csrs.SEPC_REGW)
"scause": `checkCSR(dut.hart.priv.priv.csr.csrs.genblk1.SCAUSE_REGW)
"stvec": `checkCSR(dut.hart.priv.priv.csr.csrs.STVEC_REGW)
"stval": `checkCSR(dut.hart.priv.priv.csr.csrs.genblk1.STVAL_REGW)
endcase
end
if (fault == 1) begin
@ -667,7 +667,7 @@ module testbench();
begin
int i;
// Grab the SATP register from privileged unit
SATP = dut.hart.priv.csr.SATP_REGW;
SATP = dut.hart.priv.priv.csr.SATP_REGW;
// Split the virtual address into page number segments and offset
VPN[2] = adrIn[38:30];
VPN[1] = adrIn[29:21];
@ -677,7 +677,7 @@ module testbench();
SvMode = SATP[63];
// Only perform translation if translation is on and the processor is not
// in machine mode
if (SvMode && (dut.hart.priv.PrivilegeModeW != `M_MODE)) begin
if (SvMode && (dut.hart.priv.priv.PrivilegeModeW != `M_MODE)) begin
BaseAdr = SATP[43:0] << 12;
for (i = 2; i >= 0; i--) begin
PAdr = BaseAdr + (VPN[i] << 3);

View File

@ -287,7 +287,7 @@ logic [3:0] dummy;
// Termination condition
// terminate on a specific ECALL for Imperas tests, or on a jump to self infinite loop for RISC-V Arch tests
assign DCacheFlushStart = dut.hart.priv.EcallFaultM &&
assign DCacheFlushStart = dut.hart.priv.priv.EcallFaultM &&
(dut.hart.ieu.dp.regf.rf[3] == 1 ||
(dut.hart.ieu.dp.regf.we3 &&
dut.hart.ieu.dp.regf.a3 == 3 &&
@ -318,7 +318,7 @@ module riscvassertions;
initial begin
assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
assert (`DIV_BITSPERCYCLE == 1 || `DIV_BITSPERCYCLE==2 || `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double without supporting float");
assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`XLEN == 64 || ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 || `MEM_DCACHE == 0 || `MEM_VIRTMEM == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_BLOCKLENINBITS >= 128 || `MEM_DCACHE == 0) else $error("DCACHE_BLOCKLENINBITS must be at least 128 when caches are enabled");