From 3c3bfd055e589e348e57554069d82855e453b25e Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Dec 2021 16:53:41 -0800 Subject: [PATCH] Moved generate statements for optional units into wallypipelinedhart --- wally-pipelined/src/fpu/fpu.sv | 474 +++++++++--------- wally-pipelined/src/muldiv/muldiv.sv | 93 ++-- wally-pipelined/src/privileged/csr.sv | 108 ++-- wally-pipelined/src/privileged/privileged.sv | 2 - .../src/wally/wallypipelinedhart.sv | 156 +++--- .../sdc/tb => testbench/sdc}/ram2sdLoad.py | 0 .../sdc/tb => testbench/sdc}/ramdisk2.hex | 0 .../{src/sdc/tb => testbench/sdc}/run_tb.do | 0 .../sdc/tb => testbench/sdc}/sd_top_tb.sv | 0 .../{src/sdc/tb => testbench/sdc}/wave.do | 0 wally-pipelined/testbench/testbench-linux.sv | 54 +- wally-pipelined/testbench/testbench.sv | 4 +- 12 files changed, 424 insertions(+), 467 deletions(-) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/ram2sdLoad.py (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/ramdisk2.hex (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/run_tb.do (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/sd_top_tb.sv (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/wave.do (100%) diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 0ba61c6c..7b9680ed 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -44,258 +44,244 @@ module fpu ( output logic [4:0] SetFflagsM // FPU flags (to privileged unit) ); - //*** make everything FLEN at some point - //*** add the 128 bit support to the if statement when needed - //*** make new tests for fp using testfloat that include flag checking and all rounding modes - //*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec - //*** only fma/mul and fp <-> int convert flags have been tested. test the others. + //*** make everything FLEN at some point + //*** add the 128 bit support to the if statement when needed + //*** make new tests for fp using testfloat that include flag checking and all rounding modes + //*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec + //*** only fma/mul and fp <-> int convert flags have been tested. test the others. - // FPU specifics: - // - uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - // single stored in a double: | 32 1s | single precision value | - // - sets the underflow after rounding + // FPU specifics: + // - uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + // single stored in a double: | 32 1s | single precision value | + // - sets the underflow after rounding - generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu + // control signals + logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division or squareroot + logic FWriteIntD; // Write to integer register + logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals + logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register + logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register + logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component + logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage + logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input - // control signals - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division or squareroot - logic FWriteIntD; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register - logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register - logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component - logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage - logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input - - // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding) - logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) - - // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage - logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic [10:0] BiasE; // bias based on precision (single=7f double=3ff) - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XNaNQ, YNaNQ; // is the input a NaN - divide - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, YDenormE, ZDenormE; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XZeroQ, YZeroQ; // is the input zero - divide - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XInfQ, YInfQ; // is the input infinity - divide - logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE; // is normal - logic FmtQ; - logic FOpCtrlQ; - - // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM; // divide/squareroot flags - logic [63:0] FMAResM, FMAResW; // FMA/multiply result - logic [4:0] FMAFlgM; // FMA/multiply result - logic [63:0] ReadResW; // read result (load instruction) - logic [63:0] CvtFpResE; // add/FP -> FP convert result - logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags - logic [63:0] CvtResE; // FP <-> int convert result - logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this - logic [63:0] ClassResE; // classify result - logic [63:0] CmpResE; // compare result - logic CmpNVE; // compare invalid flag (Not Valid) - logic [63:0] SgnResE; // sign injection result - logic SgnNVE; // sign injection invalid flag (Not Valid) - logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage - logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage - logic [`XLEN-1:0] FIntResE; - logic [63:0] FPUResultW; // final FP result being written to the FP register - // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic load_preload; // enable for FF on fpdivsqrt - logic [63:0] AlignedSrcAE; // align SrcA to the floating point format + // regfile signals + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding) + logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) - // DECODE STAGE - - // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, - .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // FP register file - fregfile fregfile (.clk, .reset, .we4(FRegWriteW), - .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), - .a4(RdW), .wd4(FPUResultW), - .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); + // unpacking signals + logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage + logic XSgnM, YSgnM; // input's sign - memory stage + logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage + logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage + logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage + logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage + logic [10:0] BiasE; // bias based on precision (single=7f double=3ff) + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XNaNQ, YNaNQ; // is the input a NaN - divide + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, YDenormE, ZDenormE; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XZeroQ, YZeroQ; // is the input zero - divide + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XInfQ, YInfQ; // is the input infinity - divide + logic XExpMaxE; // is the exponent all ones (max value) + logic XNormE; // is normal + logic FmtQ; + logic FOpCtrlQ; - // D/E pipeline registers - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); + // result and flag signals + logic [63:0] FDivResM, FDivResW; // divide/squareroot result + logic [4:0] FDivFlgM; // divide/squareroot flags + logic [63:0] FMAResM, FMAResW; // FMA/multiply result + logic [4:0] FMAFlgM; // FMA/multiply result + logic [63:0] ReadResW; // read result (load instruction) + logic [63:0] CvtFpResE; // add/FP -> FP convert result + logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags + logic [63:0] CvtResE; // FP <-> int convert result + logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this + logic [63:0] ClassResE; // classify result + logic [63:0] CmpResE; // compare result + logic CmpNVE; // compare invalid flag (Not Valid) + logic [63:0] SgnResE; // sign injection result + logic SgnNVE; // sign injection invalid flag (Not Valid) + logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage + logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage + logic [`XLEN-1:0] FIntResE; + logic [63:0] FPUResultW; // final FP result being written to the FP register + // other signals + logic FDivSqrtDoneE; // is divide done + logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit + logic load_preload; // enable for FF on fpdivsqrt + logic [63:0] AlignedSrcAE; // align SrcA to the floating point format - // EXECUTION STAGE - // Hazard unit for FPU - // - determines if any forwarding or stalls are needed - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, - .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); - - // forwarding muxs - mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); - mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); - mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); - mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, - {2'b0, {10{1'b1}}, 52'b0}, - {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)}, - FSrcYE); // Force Z to be 0 for multiply instructions - // Force Z to be 0 for multiply instructions - mux3 #(64) fzmulmux (FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); - - // unpacking unit - // - splits FP inputs into their various parts - // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpacking unpacking (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); - - // FMA - // - two stage FMA - // - execute stage - multiplication and addend shifting - // - memory stage - addition and rounding - // - handles FMA and multiply instructions - fma fma (.clk, .reset, .FlushM, .StallM, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, - .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, - .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, - .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - .FOpCtrlE, - .FmtE, .FmtM, .FrmM, - .FMAFlgM, .FMAResM); - - // fpdivsqrt using Goldschmidt's iteration - flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), - .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + // DECODE STAGE - // convert from signle to double and vice versa - cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); - - // compare unit - // - computation is done in one stage - // - writes to FP file durring min/max instructions - // - other comparisons write a 1 or 0 to the integer register - fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), - .FSrcXE, .FSrcYE, .FOpCtrlE, - .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, - .Invalid(CmpNVE), .CmpResE); - - // sign injection unit - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, - .SgnNVE, .SgnResE); - - // classify - fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, - .XSNaNE, .ClassResE); + // calculate FP control signals + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, + .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - // Convert - fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, - .CvtResE, .CvtFlgE); - - // data to be stored in memory - to IEU - // - FP uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - assign FWriteDataE = FSrcYE[`XLEN-1:0]; - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); - - // select a result that may be written to the FP register - mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); - mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); - - // select the result that may be written to the integer register - to IEU - mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], - CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); - - // E/M pipe registers + // FP register file + fregfile fregfile (.clk, .reset, .we4(FRegWriteW), + .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), + .a4(RdW), .wd4(FPUResultW), + .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - flopenrc #(65) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); - flopenrc #(65) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); - flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); - flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM, - {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, - {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); - flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM); - flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); - flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FrmE, FmtE}, - {FRegWriteM, FResultSelM, FrmM, FmtM}); - - // BEGIN MEMORY STAGE - - // FPU flag selection - to privileged - mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM); - - // M/W pipe registers - flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); - flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, FmtM}, - {FRegWriteW, FResultSelW, FmtW}); - - // BEGIN WRITEBACK STAGE - - // put ReadData into NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - // - for load instruction - mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); - - // select the result to be written to the FP register - mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); + // D/E pipeline registers + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); - end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low - assign FStallD = 0; - assign FWriteIntE = 0; - assign FWriteDataE = 0; - assign FIntResM = 0; - assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; - assign SetFflagsM = 0; - end - endgenerate - + // EXECUTION STAGE + // Hazard unit for FPU + // - determines if any forwarding or stalls are needed + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, + .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); + + // forwarding muxs + mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); + mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); + mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); + mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, + {2'b0, {10{1'b1}}, 52'b0}, + {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)}, + FSrcYE); // Force Z to be 0 for multiply instructions + // Force Z to be 0 for multiply instructions + mux3 #(64) fzmulmux (FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); + + // unpacking unit + // - splits FP inputs into their various parts + // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) + unpacking unpacking (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + + // FMA + // - two stage FMA + // - execute stage - multiplication and addend shifting + // - memory stage - addition and rounding + // - handles FMA and multiply instructions + fma fma (.clk, .reset, .FlushM, .StallM, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, + .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, + .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, + .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + .FOpCtrlE, + .FmtE, .FmtM, .FrmM, + .FMAFlgM, .FMAResM); + + // fpdivsqrt using Goldschmidt's iteration + flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), + .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), + .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, + .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + + // convert from signle to double and vice versa + cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); + + // compare unit + // - computation is done in one stage + // - writes to FP file durring min/max instructions + // - other comparisons write a 1 or 0 to the integer register + fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), + .FSrcXE, .FSrcYE, .FOpCtrlE, + .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, + .Invalid(CmpNVE), .CmpResE); + + // sign injection unit + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, + .SgnNVE, .SgnResE); + + // classify + fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, + .XSNaNE, .ClassResE); + + // Convert + fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, + .CvtResE, .CvtFlgE); + + // data to be stored in memory - to IEU + // - FP uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); + + // select a result that may be written to the FP register + mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); + mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); + + // select the result that may be written to the integer register - to IEU + mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], + CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); + + // E/M pipe registers + + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); + flopenrc #(65) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); + flopenrc #(65) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); + flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); + flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM, + {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); + flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM); + flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); + flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FrmE, FmtE}, + {FRegWriteM, FResultSelM, FrmM, FmtM}); + + // BEGIN MEMORY STAGE + + // FPU flag selection - to privileged + mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM); + + // M/W pipe registers + flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); + flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResultSelM, FmtM}, + {FRegWriteW, FResultSelW, FmtW}); + + // BEGIN WRITEBACK STAGE + + // put ReadData into NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + // - for load instruction + mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); + + // select the result to be written to the FP register + mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); endmodule // fpu diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index b3bf4e83..a1e76616 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -40,65 +40,50 @@ module muldiv ( input logic StallM, StallW, FlushM, FlushW ); - generate - if (`M_SUPPORTED) begin - logic [`XLEN-1:0] MulDivResultM; - logic [`XLEN-1:0] PrelimResultM; - logic [`XLEN-1:0] QuotM, RemM; - logic [`XLEN*2-1:0] ProdM; + logic [`XLEN-1:0] MulDivResultM; + logic [`XLEN-1:0] PrelimResultM; + logic [`XLEN-1:0] QuotM, RemM; + logic [`XLEN*2-1:0] ProdM; - logic DivE; - logic DivSignedE; - logic W64M; - - // Multiplier - mul mul( - .clk, .reset, - .StallM, .FlushM, - // .SrcAE, .SrcBE, - .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - .Funct3E, - .ProdM - ); + logic DivE; + logic DivSignedE; + logic W64M; - // Divide - // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign DivE = MulDivE & Funct3E[2]; - assign DivSignedE = ~Funct3E[0]; - intdivrestoring div(.clk, .reset, .StallM, - .DivSignedE, .W64E, .DivE, .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); - - // Result multiplexer - always_comb - case (Funct3M) - 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; - 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b100: PrelimResultM = QuotM; - 3'b101: PrelimResultM = QuotM; - 3'b110: PrelimResultM = RemM; - 3'b111: PrelimResultM = RemM; - endcase - - // Handle sign extension for W-type instructions - flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M); - if (`XLEN == 64) begin // RV64 has W-type instructions - assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; - end else begin // RV32 has no W-type instructions - assign MulDivResultM = PrelimResultM; - end + // Multiplier + mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM); - // Writeback stage pipeline register + // Divide + // Start a divide when a new division instruction is received and the divider isn't already busy or finishing + assign DivE = MulDivE & Funct3E[2]; + assign DivSignedE = ~Funct3E[0]; + intdivrestoring div(.clk, .reset, .StallM, .DivSignedE, .W64E, .DivE, + .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); + + // Result multiplexer + always_comb + case (Funct3M) + 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; + 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b100: PrelimResultM = QuotM; + 3'b101: PrelimResultM = QuotM; + 3'b110: PrelimResultM = RemM; + 3'b111: PrelimResultM = RemM; + endcase - flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); - - end else begin // no M instructions supported - assign MulDivResultW = 0; - assign DivBusyE = 0; - end - endgenerate + // Handle sign extension for W-type instructions + flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M); + generate + if (`XLEN == 64) begin:resmux // RV64 has W-type instructions + assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; + end else begin:resmux // RV32 has no W-type instructions + assign MulDivResultM = PrelimResultM; + end + endgenerate + // Writeback stage pipeline register + flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); endmodule // muldiv diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 440045d3..63d0a583 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -86,76 +86,48 @@ module csr #(parameter logic IllegalCSRCAccessM, IllegalCSRMAccessM, IllegalCSRSAccessM, IllegalCSRUAccessM, IllegalCSRNAccessM, InsufficientCSRPrivilegeM; logic IllegalCSRMWriteReadonlyM; - generate - if (`ZICSR_SUPPORTED) begin - // modify CSRs - always_comb begin - // Choose either rs1 or uimm[4:0] as source - CSRSrcM = InstrM[14] ? {{(`XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM; - // Compute AND/OR modification - CSRRWM = CSRSrcM; - CSRRSM = CSRReadValM | CSRSrcM; - CSRRCM = CSRReadValM & ~CSRSrcM; - case (InstrM[13:12]) - 2'b01: CSRWriteValM = CSRRWM; - 2'b10: CSRWriteValM = CSRRSM; - 2'b11: CSRWriteValM = CSRRCM; - default: CSRWriteValM = CSRReadValM; - endcase - end + // modify CSRs + always_comb begin + // Choose either rs1 or uimm[4:0] as source + CSRSrcM = InstrM[14] ? {{(`XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM; + // Compute AND/OR modification + CSRRWM = CSRSrcM; + CSRRSM = CSRReadValM | CSRSrcM; + CSRRCM = CSRReadValM & ~CSRSrcM; + case (InstrM[13:12]) + 2'b01: CSRWriteValM = CSRRWM; + 2'b10: CSRWriteValM = CSRRSM; + 2'b11: CSRWriteValM = CSRRCM; + default: CSRWriteValM = CSRReadValM; + endcase + end - // write CSRs - assign CSRAdrM = InstrM[31:20]; - assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM; - assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment - assign NextCauseM = TrapM ? CauseM : CSRWriteValM; - assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM; - assign CSRMWriteM = CSRWriteM && (PrivilegeModeW == `M_MODE); - assign CSRSWriteM = CSRWriteM && (|PrivilegeModeW); - assign CSRUWriteM = CSRWriteM; + // write CSRs + assign CSRAdrM = InstrM[31:20]; + assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM; + assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment + assign NextCauseM = TrapM ? CauseM : CSRWriteValM; + assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM; + assign CSRMWriteM = CSRWriteM && (PrivilegeModeW == `M_MODE); + assign CSRSWriteM = CSRWriteM && (|PrivilegeModeW); + assign CSRUWriteM = CSRWriteM; - csri csri(.*); - csrsr csrsr(.*); - csrc counters(.*); - csrm csrm(.*); // Machine Mode CSRs - csrs csrs(.*); - csrn csrn(.CSRNWriteM(CSRUWriteM), .*); // User Mode Exception Registers - csru csru(.*); // Floating Point Flags are part of User MOde + csri csri(.*); + csrsr csrsr(.*); + csrc counters(.*); + csrm csrm(.*); // Machine Mode CSRs + csrs csrs(.*); + csrn csrn(.CSRNWriteM(CSRUWriteM), .*); // User Mode Exception Registers + csru csru(.*); // Floating Point Flags are part of User MOde - // merge CSR Reads - assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM; - // *** add W stall 2/22/21 dh to try fixing memory stalls -// floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRReadValW); - flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); + // merge CSR Reads + assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM; + flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); - // merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient - assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) || - (CSRAdrM[9:8] == 2'b01 && PrivilegeModeW == `U_MODE); - assign IllegalCSRAccessM = ((IllegalCSRCAccessM && IllegalCSRMAccessM && - IllegalCSRSAccessM && IllegalCSRUAccessM && IllegalCSRNAccessM || - InsufficientCSRPrivilegeM) && CSRReadM) || IllegalCSRMWriteReadonlyM; - end else begin // CSRs not implemented - assign STATUS_MPP = 2'b11; - assign STATUS_SPP = 2'b0; - assign STATUS_TSR = 0; - assign MEPC_REGW = 0; - assign SEPC_REGW = 0; - assign UEPC_REGW = 0; - assign UTVEC_REGW = 0; - assign STVEC_REGW = 0; - assign MTVEC_REGW = 0; - assign MEDELEG_REGW = 0; - assign MIDELEG_REGW = 0; - assign SEDELEG_REGW = 0; - assign SIDELEG_REGW = 0; - assign SATP_REGW = 0; - assign MIP_REGW = 0; - assign MIE_REGW = 0; - assign STATUS_MIE = 0; - assign STATUS_SIE = 0; - assign FRM_REGW = 0; - assign CSRReadValM = 0; - assign IllegalCSRAccessM = CSRReadM; - end - endgenerate + // merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient + assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) || + (CSRAdrM[9:8] == 2'b01 && PrivilegeModeW == `U_MODE); + assign IllegalCSRAccessM = ((IllegalCSRCAccessM && IllegalCSRMAccessM && + IllegalCSRSAccessM && IllegalCSRUAccessM && IllegalCSRNAccessM || + InsufficientCSRPrivilegeM) && CSRReadM) || IllegalCSRMWriteReadonlyM; endmodule diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index c5bc8a45..c84a9d27 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -239,8 +239,6 @@ module privileged ( .ExceptionM, .PendingInterruptM, .PrivilegedNextPCM, .CauseM, .NextFaultMtvalM); - - endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 7be5b8c6..304ac1a9 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -198,7 +198,6 @@ module wallypipelinedhart ( ); // instruction fetch unit: PC, branch prediction, instruction cache - ieu ieu( .clk, .reset, @@ -276,7 +275,7 @@ module wallypipelinedhart ( .LSUStall); // change to LSUStall - + // *** Ross: please make EBU conditional when only supporting internal memories ahblite ebu(// IFU connections .clk, .reset, @@ -295,22 +294,7 @@ module wallypipelinedhart ( .HWRITED); - muldiv mdu( - .clk, .reset, - // Execute Stage interface - // .SrcAE, .SrcBE, - .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - .Funct3E, .Funct3M, - .MulDivE, .W64E, - // Writeback stage - .MulDivResultW, - // Divide Done - .DivBusyE, - // hazards - .StallM, .StallW, .FlushM, .FlushW - ); // multiply and divide unit - - hazard hzu( + hazard hzu( .BPPredWrongE, .CSRWritePendingDEM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MulDivStallD, .CSRRdStallD, .LSUStall, .ICacheStallF, @@ -323,57 +307,89 @@ module wallypipelinedhart ( .FlushF, .FlushD, .FlushE, .FlushM, .FlushW ); // global stall and flush control - // Priveleged block operates in M and W stages, handling CSRs and exceptions - privileged priv( - .clk, .reset, - .FlushD, .FlushE, .FlushM, .FlushW, - .StallD, .StallE, .StallM, .StallW, - .CSRReadM, .CSRWriteM, .SrcAM, .PCM, - .InstrM, .CSRReadValW, .PrivilegedNextPCM, - .RetM, .TrapM, - .ITLBFlushF, .DTLBFlushM, - .InstrValidM, .CommittedM, - .FRegWriteM, .LoadStallD, - .BPPredDirWrongM, .BTBPredPCWrongM, - .RASPredPCWrongM, .BPPredClassNonCFIWrongM, - .InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM, - .ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM, - .WalkerInstrPageFaultF, .WalkerLoadPageFaultM, .WalkerStorePageFaultM, - .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, - .LoadMisalignedFaultM, .StoreMisalignedFaultM, - .TimerIntM, .ExtIntM, .SwIntM, - .MTIME_CLINT, .MTIMECMP_CLINT, - .InstrMisalignedAdrM, .MemAdrM, - .SetFflagsM, - // Trap signals from pmp/pma in mmu - // *** do these need to be split up into one for dmem and one for ifu? - // instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem? - .InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM, - .ExceptionM, .PendingInterruptM, .IllegalFPUInstrE, - .PrivilegeModeW, .SATP_REGW, - .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .FRM_REGW,.BreakpointFaultM, .EcallFaultM - ); - + generate + if (`ZICSR_SUPPORTED) begin:priv + privileged priv( + .clk, .reset, + .FlushD, .FlushE, .FlushM, .FlushW, + .StallD, .StallE, .StallM, .StallW, + .CSRReadM, .CSRWriteM, .SrcAM, .PCM, + .InstrM, .CSRReadValW, .PrivilegedNextPCM, + .RetM, .TrapM, + .ITLBFlushF, .DTLBFlushM, + .InstrValidM, .CommittedM, + .FRegWriteM, .LoadStallD, + .BPPredDirWrongM, .BTBPredPCWrongM, + .RASPredPCWrongM, .BPPredClassNonCFIWrongM, + .InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM, + .ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM, + .WalkerInstrPageFaultF, .WalkerLoadPageFaultM, .WalkerStorePageFaultM, + .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, + .LoadMisalignedFaultM, .StoreMisalignedFaultM, + .TimerIntM, .ExtIntM, .SwIntM, + .MTIME_CLINT, .MTIMECMP_CLINT, + .InstrMisalignedAdrM, .MemAdrM, + .SetFflagsM, + // Trap signals from pmp/pma in mmu + // *** do these need to be split up into one for dmem and one for ifu? + // instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem? + .InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM, + .ExceptionM, .PendingInterruptM, .IllegalFPUInstrE, + .PrivilegeModeW, .SATP_REGW, + .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, + .FRM_REGW,.BreakpointFaultM, .EcallFaultM + ); + end else begin + assign CSRReadValW = 0; + assign PrivilegedNextPCM = 0; + assign RetM = 0; + assign TrapM = 0; + assign ITLBFlushF = 0; + assign DTLBFlushM = 0; + end + if (`M_SUPPORTED) begin:mdu + muldiv mdu( + .clk, .reset, + .ForwardedSrcAE, .ForwardedSrcBE, + .Funct3E, .Funct3M, .MulDivE, .W64E, + .MulDivResultW, .DivBusyE, + .StallM, .StallW, .FlushM, .FlushW + ); + end else begin // no M instructions supported + assign MulDivResultW = 0; + assign DivBusyE = 0; + end - fpu fpu( - .clk, .reset, - .FRM_REGW, // Rounding mode from CSR - .InstrD, // instruction from IFU - .ReadDataW,// Read data from memory - .ForwardedSrcAE, // Integer input being processed (from IEU) - .StallE, .StallM, .StallW, // stall signals from HZU - .FlushE, .FlushM, .FlushW, // flush signals from HZU - .RdM, .RdW, // which FP register to write to (from IEU) - .FRegWriteM, // FP register write enable - .FStallD, // Stall the decode stage - .FWriteIntE, // integer register write enable - .FWriteDataE, // Data to be written to memory - .FIntResM, // data to be written to integer register - .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - .SetFflagsM // FPU flags (to privileged unit) - ); // floating point unit - + if (`F_SUPPORTED) begin:fpu + fpu fpu( + .clk, .reset, + .FRM_REGW, // Rounding mode from CSR + .InstrD, // instruction from IFU + .ReadDataW,// Read data from memory + .ForwardedSrcAE, // Integer input being processed (from IEU) + .StallE, .StallM, .StallW, // stall signals from HZU + .FlushE, .FlushM, .FlushW, // flush signals from HZU + .RdM, .RdW, // which FP register to write to (from IEU) + .FRegWriteM, // FP register write enable + .FStallD, // Stall the decode stage + .FWriteIntE, // integer register write enable + .FWriteDataE, // Data to be written to memory + .FIntResM, // data to be written to integer register + .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) + .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + .SetFflagsM // FPU flags (to privileged unit) + ); // floating point unit + end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; + end + + endgenerate + // Priveleged block operates in M and W stages, handling CSRs and exceptions endmodule diff --git a/wally-pipelined/src/sdc/tb/ram2sdLoad.py b/wally-pipelined/testbench/sdc/ram2sdLoad.py similarity index 100% rename from wally-pipelined/src/sdc/tb/ram2sdLoad.py rename to wally-pipelined/testbench/sdc/ram2sdLoad.py diff --git a/wally-pipelined/src/sdc/tb/ramdisk2.hex b/wally-pipelined/testbench/sdc/ramdisk2.hex similarity index 100% rename from wally-pipelined/src/sdc/tb/ramdisk2.hex rename to wally-pipelined/testbench/sdc/ramdisk2.hex diff --git a/wally-pipelined/src/sdc/tb/run_tb.do b/wally-pipelined/testbench/sdc/run_tb.do similarity index 100% rename from wally-pipelined/src/sdc/tb/run_tb.do rename to wally-pipelined/testbench/sdc/run_tb.do diff --git a/wally-pipelined/src/sdc/tb/sd_top_tb.sv b/wally-pipelined/testbench/sdc/sd_top_tb.sv similarity index 100% rename from wally-pipelined/src/sdc/tb/sd_top_tb.sv rename to wally-pipelined/testbench/sdc/sd_top_tb.sv diff --git a/wally-pipelined/src/sdc/tb/wave.do b/wally-pipelined/testbench/sdc/wave.do similarity index 100% rename from wally-pipelined/src/sdc/tb/wave.do rename to wally-pipelined/testbench/sdc/wave.do diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index a64ca434..41e9480b 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -174,7 +174,7 @@ module testbench(); // Useful Aliases `define RF dut.hart.ieu.dp.regf.rf `define PC dut.hart.ifu.pcreg.q - `define CSR_BASE dut.hart.priv.csr.genblk1 + `define CSR_BASE dut.hart.priv.priv.csr `define HPMCOUNTER `CSR_BASE.counters.genblk1.HPMCOUNTER_REGW `define PMP_BASE `CSR_BASE.csrm.genblk4 `define PMPCFG genblk2.PMPCFGreg.q @@ -210,8 +210,8 @@ module testbench(); `define STATUS_MIE `CSR_BASE.csrsr.STATUS_MIE `define STATUS_SIE `CSR_BASE.csrsr.STATUS_SIE `define STATUS_UIE `CSR_BASE.csrsr.STATUS_UIE - `define PRIV dut.hart.priv.privmodereg.q - `define INSTRET dut.hart.priv.csr.genblk1.counters.genblk1.genblk2.INSTRETreg.q + `define PRIV dut.hart.priv.priv.privmodereg.q + `define INSTRET dut.hart.priv.priv.csr.counters.genblk1.genblk2.INSTRETreg.q // Common Macros `define checkCSR(CSR) \ begin \ @@ -308,9 +308,9 @@ module testbench(); integer ramFile; integer readResult; initial begin - force dut.hart.priv.SwIntM = 0; - force dut.hart.priv.TimerIntM = 0; - force dut.hart.priv.ExtIntM = 0; + force dut.hart.priv.priv.SwIntM = 0; + force dut.hart.priv.priv.TimerIntM = 0; + force dut.hart.priv.priv.ExtIntM = 0; $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootrom.bootrom.RAM, 'h1000 >> 3); $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.mem); @@ -365,7 +365,7 @@ module testbench(); // on the next falling edge the expected state is compared to the wally state. // step 0: read the expected state - assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.trap.InstrPageFaultM & ~dut.hart.priv.trap.InterruptM & ~dut.hart.StallM; + assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.priv.trap.InstrPageFaultM & ~dut.hart.priv.priv.trap.InterruptM & ~dut.hart.StallM; `define SCAN_NEW_INSTR_FROM_TRACE(STAGE) \ // always check PC, instruction bits \ if (checkInstrM) begin \ @@ -479,7 +479,7 @@ module testbench(); end else begin // update MIP immediately $display("%tns: Updating MIP to %x",$time,NextMIPexpected); MIPexpected = NextMIPexpected; - force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; + force dut.hart.priv.priv.csr.csri.MIP_REGW = MIPexpected; end // $display("%tn: ExpectedCSRArrayM = %p",$time,ExpectedCSRArrayM); // $display("%tn: ExpectedCSRArrayValueM = %p",$time,ExpectedCSRArrayValueM); @@ -491,11 +491,11 @@ module testbench(); // $display("%tn: ExpectedCSRArrayValueM[NumCSRM] %x",$time,ExpectedCSRArrayValueM[NumCSRM]); end if(RequestDelayedMIP & checkInstrM) begin - $display("%tns: Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.csr.genblk1.csrm.MEPC_REGW); + $display("%tns: Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.priv.csr.csrm.MEPC_REGW); $display("%tns: Updating MIP to %x",$time,NextMIPexpected); MIPexpected = NextMIPexpected; - force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; - $display("%tns: Finished Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.csr.genblk1.csrm.MEPC_REGW); + force dut.hart.priv.priv.csr.csri.MIP_REGW = MIPexpected; + $display("%tns: Finished Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.priv.csr.csrm.MEPC_REGW); RequestDelayedMIP = 0; end end @@ -576,7 +576,7 @@ module testbench(); `checkEQ("PCW",PCW,ExpectedPCW) //`checkEQ("InstrW",InstrW,ExpectedInstrW) <-- not viable because of // compressed to uncompressed conversion - `checkEQ("Instr Count",dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW,InstrCountW) + `checkEQ("Instr Count",dut.hart.priv.priv.csr.counters.genblk1.INSTRET_REGW,InstrCountW) #2; // delay 2 ns. if(`DEBUG_TRACE >= 5) begin $display("%tns, %d instrs: Reg Write Address %02d ? expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); @@ -601,19 +601,19 @@ module testbench(); // check csr for(NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) begin case(ExpectedCSRArrayW[NumCSRPostWIndex]) - "mhartid": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW) - "mstatus": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW) - "mtvec": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW) - "mip": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIP_REGW) - "mie": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIE_REGW) - "mideleg": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW) - "medeleg": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW) - "mepc": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MEPC_REGW) - "mtval": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW) - "sepc": `checkCSR(dut.hart.priv.csr.genblk1.csrs.SEPC_REGW) - "scause": `checkCSR(dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW) - "stvec": `checkCSR(dut.hart.priv.csr.genblk1.csrs.STVEC_REGW) - "stval": `checkCSR(dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW) + "mhartid": `checkCSR(dut.hart.priv.priv.csr.csrm.MHARTID_REGW) + "mstatus": `checkCSR(dut.hart.priv.priv.csr.csrm.MSTATUS_REGW) + "mtvec": `checkCSR(dut.hart.priv.priv.csr.csrm.MTVEC_REGW) + "mip": `checkCSR(dut.hart.priv.priv.csr.csrm.MIP_REGW) + "mie": `checkCSR(dut.hart.priv.priv.csr.csrm.MIE_REGW) + "mideleg": `checkCSR(dut.hart.priv.priv.csr.csrm.MIDELEG_REGW) + "medeleg": `checkCSR(dut.hart.priv.priv.csr.csrm.MEDELEG_REGW) + "mepc": `checkCSR(dut.hart.priv.priv.csr.csrm.MEPC_REGW) + "mtval": `checkCSR(dut.hart.priv.priv.csr.csrm.MTVAL_REGW) + "sepc": `checkCSR(dut.hart.priv.priv.csr.csrs.SEPC_REGW) + "scause": `checkCSR(dut.hart.priv.priv.csr.csrs.genblk1.SCAUSE_REGW) + "stvec": `checkCSR(dut.hart.priv.priv.csr.csrs.STVEC_REGW) + "stval": `checkCSR(dut.hart.priv.priv.csr.csrs.genblk1.STVAL_REGW) endcase end if (fault == 1) begin @@ -667,7 +667,7 @@ module testbench(); begin int i; // Grab the SATP register from privileged unit - SATP = dut.hart.priv.csr.SATP_REGW; + SATP = dut.hart.priv.priv.csr.SATP_REGW; // Split the virtual address into page number segments and offset VPN[2] = adrIn[38:30]; VPN[1] = adrIn[29:21]; @@ -677,7 +677,7 @@ module testbench(); SvMode = SATP[63]; // Only perform translation if translation is on and the processor is not // in machine mode - if (SvMode && (dut.hart.priv.PrivilegeModeW != `M_MODE)) begin + if (SvMode && (dut.hart.priv.priv.PrivilegeModeW != `M_MODE)) begin BaseAdr = SATP[43:0] << 12; for (i = 2; i >= 0; i--) begin PAdr = BaseAdr + (VPN[i] << 3); diff --git a/wally-pipelined/testbench/testbench.sv b/wally-pipelined/testbench/testbench.sv index 7301f454..ef0c0692 100644 --- a/wally-pipelined/testbench/testbench.sv +++ b/wally-pipelined/testbench/testbench.sv @@ -287,7 +287,7 @@ logic [3:0] dummy; // Termination condition // terminate on a specific ECALL for Imperas tests, or on a jump to self infinite loop for RISC-V Arch tests - assign DCacheFlushStart = dut.hart.priv.EcallFaultM && + assign DCacheFlushStart = dut.hart.priv.priv.EcallFaultM && (dut.hart.ieu.dp.regf.rf[3] == 1 || (dut.hart.ieu.dp.regf.we3 && dut.hart.ieu.dp.regf.a3 == 3 && @@ -318,7 +318,7 @@ module riscvassertions; initial begin assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); assert (`DIV_BITSPERCYCLE == 1 || `DIV_BITSPERCYCLE==2 || `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double without supporting float"); + assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)"); assert (`XLEN == 64 || ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32"); assert (`DCACHE_WAYSIZEINBYTES <= 4096 || `MEM_DCACHE == 0 || `MEM_VIRTMEM == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); assert (`DCACHE_BLOCKLENINBITS >= 128 || `MEM_DCACHE == 0) else $error("DCACHE_BLOCKLENINBITS must be at least 128 when caches are enabled");