diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index ee028eb32..a3b7f0c2c 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit ee028eb325525148a34420a4ca7959b24220a91e +Subproject commit a3b7f0c2cf89652b8a0cba3146890c512ff8ba44 diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 0156dc9fd..5b131237a 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -156,7 +156,7 @@ def GeometricAverage(benchmarks, field): return Product ** (1.0/index) def ComputeGeometricAverage(benchmarks): - fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR'] + fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR', 'CPI'] AllAve = {} for field in fields: Product = 1 @@ -247,20 +247,34 @@ if(sys.argv[1] == '-b'): currPercent.append(percent) dct[PredType] = (currSize, currPercent) print(dct) + fig, axes = plt.subplots() + marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x'} + colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue'} for cat in dct: (x, y) = dct[cat] - plt.scatter(x, y, label='k') - plt.plot(x, y) - plt.ylabel('Prediction Accuracy') - plt.xlabel('Size (b or k)') - plt.legend(loc='upper left') + x=[int(2**int(v)) for v in x] + print(x, y) + axes.plot(x,y, color=colors[cat]) + axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat]) + #plt.scatter(x, y, label=cat) + #plt.plot(x, y) + #axes.set_xticks([4, 6, 8, 10, 12, 14]) + axes.legend(loc='upper left') + axes.set_xscale("log") + axes.set_ylabel('Prediction Accuracy') + axes.set_xlabel('Entries') + axes.set_xticks([64, 256, 1024, 4096, 16384, 65536]) + axes.set_xticklabels([64, 256, 1024, 4096, 16384, 65536]) + axes.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5) plt.show() else: # steps 1 and 2 benchmarks = ProcessFile(sys.argv[1]) - ComputeAverage(benchmarks) + print(benchmarks[0]) + ComputeAll(benchmarks) + ComputeGeometricAverage(benchmarks) # 3 process into useful data # cache hit rates # cache fill time @@ -268,7 +282,6 @@ else: # hazard counts # CPI # instruction distribution - ComputeAll(benchmarks) for benchmark in benchmarks: printStats(benchmark) diff --git a/config/buildroot/wally-config.vh b/config/buildroot/wally-config.vh index bfe69e84d..3a68571dd 100644 --- a/config/buildroot/wally-config.vh +++ b/config/buildroot/wally-config.vh @@ -135,7 +135,8 @@ `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 1 +`define SVADU_SUPPORTED 1 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/fpga/wally-config.vh b/config/fpga/wally-config.vh index 3ae91e3a7..1f7447f4d 100644 --- a/config/fpga/wally-config.vh +++ b/config/fpga/wally-config.vh @@ -144,7 +144,8 @@ `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 1 +`define SVADU_SUPPORTED 1 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32e/wally-config.vh b/config/rv32e/wally-config.vh index 6e0de3347..aee0e5410 100644 --- a/config/rv32e/wally-config.vh +++ b/config/rv32e/wally-config.vh @@ -138,7 +138,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index 1fb89abf1..2e06c03d4 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -134,10 +134,11 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT -`define BPRED_SIZE 10 +`define BPRED_SIZE 16 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32i/wally-config.vh b/config/rv32i/wally-config.vh index efbf6e7c0..d75d0c462 100644 --- a/config/rv32i/wally-config.vh +++ b/config/rv32i/wally-config.vh @@ -138,7 +138,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv32imc/wally-config.vh b/config/rv32imc/wally-config.vh index 8fb29a678..42442d46e 100644 --- a/config/rv32imc/wally-config.vh +++ b/config/rv32imc/wally-config.vh @@ -137,7 +137,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64fpquad/wally-config.vh b/config/rv64fpquad/wally-config.vh index dd8058c28..34d7628e0 100644 --- a/config/rv64fpquad/wally-config.vh +++ b/config/rv64fpquad/wally-config.vh @@ -140,7 +140,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64gc/wally-config.vh b/config/rv64gc/wally-config.vh index c65b7105e..fe6ecdc67 100644 --- a/config/rv64gc/wally-config.vh +++ b/config/rv64gc/wally-config.vh @@ -140,7 +140,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/config/rv64i/wally-config.vh b/config/rv64i/wally-config.vh index a3702c3fd..34c37f73a 100644 --- a/config/rv64i/wally-config.vh +++ b/config/rv64i/wally-config.vh @@ -140,7 +140,8 @@ `define BPRED_SIZE 10 `define BTB_SIZE 10 -`define HPTW_WRITES_SUPPORTED 0 +`define SVADU_SUPPORTED 0 +`define ZMMUL_SUPPORTED 0 // FPU division architecture `define RADIX 32'h4 diff --git a/sim/imperas.ic b/sim/imperas.ic index d28234bbc..5e3357eac 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -1,6 +1,11 @@ +#--showoverrides +#--help --helpall +--traceregs + --override cpu/show_c_prefix=T --override cpu/unaligned=F --override cpu/ignore_non_leaf_DAU=1 +--override cpu/wfi_is_nop=T # Enable the Imperas instruction coverage #-extlib refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0 @@ -33,3 +38,6 @@ # ignore settings of bits DAU for non leaf page table walks --override cpu/ignore_non_leaf_DAU=1 + +# mimpid = 0x100 +--override cpu/mimpid=0x100 diff --git a/sim/sim-imperas b/sim/sim-imperas index b6a7f1c47..aa1dc3a01 100755 --- a/sim/sim-imperas +++ b/sim/sim-imperas @@ -29,4 +29,4 @@ IMPERAS_TOOLS=$(pwd)/imperas.ic \ OTHERFLAGS="+TRACE2LOG_ENABLE=1 VERBOSE=1" \ TESTDIR=${WALLY}/tests/riscof/work/wally-riscv-arch-test/rv64i_m/privilege/src/Lee.S/ \ -vsim -do "do wally-pipelined-imperas.do rv64gc" +vsim -do "do wally-imperas.do rv64gc" diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 650e8367d..85d23d373 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -30,7 +30,7 @@ module hazard ( // Detect hazards - input logic BPPredWrongE, CSRWriteFenceM, RetM, TrapM, + input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, input logic LSUStallM, IFUStallF, input logic FCvtIntStallD, FPUStallD, @@ -65,8 +65,8 @@ module hazard ( // Similarly, CSR writes and fences flush all subsequent instructions and refetch them in light of the new operating modes and cache/TLB contents // Branch misprediction is found in the Execute stage and must flush the next two instructions. // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete - assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPPredWrongE; - assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPPredWrongE & ~(DivBusyE | FDivBusyE)); + assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; + assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); assign FlushMCause = TrapM | RetM | CSRWriteFenceM; assign FlushWCause = TrapM; diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index e39c5cacf..14be19664 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -38,7 +38,9 @@ module controller( output logic [2:0] ImmSrcD, // Type of immediate extension input logic IllegalIEUFPUInstrD, // Illegal IEU and FPU instruction output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers - // Execute stage control signals + output logic JumpD, // Jump instruction + output logic BranchD, // Branch instruction + // Execute stage control signals input logic StallE, FlushE, // Stall, flush Execute stage input logic [1:0] FlagsE, // Comparison flags ({eq, lt}) input logic FWriteIntE, // Write integer register, coming from FPU controller @@ -52,7 +54,8 @@ module controller( output logic IntDivE, // Integer divide output logic MDUE, // MDU (multiply/divide) operatio output logic W64E, // RV64 W-type operation - output logic JumpE, // jump instruction + output logic JumpE, // jump instruction + output logic BranchE, // Branch instruction output logic SCE, // Store Conditional instruction output logic BranchSignedE, // Branch comparison operands are signed (if it's a branch) output logic [3:0] BSelectE, // One-Hot encoding of if it's ZBA_ZBB_ZBC_ZBS instruction @@ -66,9 +69,7 @@ module controller( output logic RegWriteM, // Instruction writes a register (needed for Hazard unit) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid - output logic BranchD, BranchE, - output logic JumpD, - + output logic FenceM, // Fence instruction output logic FWriteIntM, // FPU controller writes integer register file // Writeback stage control signals input logic StallW, FlushW, // Stall, flush Writeback stage @@ -116,7 +117,7 @@ module controller( logic IEURegWriteE; // Register write logic IllegalERegAdrD; // RV32E attempts to write upper 16 registers logic [1:0] AtomicE; // Atomic instruction - logic FenceD, FenceE, FenceM; // Fence instruction + logic FenceD, FenceE; // Fence instruction logic SFenceVmaD; // sfence.vma instruction logic IntDivM; // Integer divide instruction logic [3:0] BSelectD; // One-Hot encoding if it's ZBA_ZBB_ZBC_ZBS instruction in decode stage @@ -162,14 +163,14 @@ module controller( ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction 7'b0110011: if (Funct7D == 7'b0000000 | Funct7D == 7'b0100000 | ((`ZBB_SUPPORTED & BSelectD[2]) | (`ZBC_SUPPORTED & BSelectD[1]) | (`ZBS_SUPPORTED & BSelectD[0]) | (`ZBA_SUPPORTED & BSelectD[3]))) ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_0_0_0_0_0_00_0; // R-type - else if (Funct7D == 7'b0000001 & `M_SUPPORTED) + else if (Funct7D == 7'b0000001 & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2]))) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_0_0_0_0_1_00_0; // Multiply/divide else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction 7'b0110111: ControlsD = `CTRLW'b1_100_01_00_000_0_0_0_1_0_0_0_0_0_00_0; // lui 7'b0111011: if ((Funct7D == 7'b0000000 | Funct7D == 7'b0100000 | (`ZBA_SUPPORTED & BSelectD[3]) | (`ZBB_SUPPORTED & BSelectD[2])) & `XLEN == 64) ControlsD = `CTRLW'b1_000_00_00_000_0_1_0_0_1_0_0_0_0_00_0; // R-type W instructions for RV64i - else if (Funct7D == 7'b0000001 & `M_SUPPORTED & `XLEN == 64) + else if (Funct7D == 7'b0000001 & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])) & `XLEN == 64) ControlsD = `CTRLW'b1_000_00_00_011_0_0_0_0_1_0_0_0_1_00_0; // W-type Multiply/Divide else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // Non-implemented instruction diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 43c16554d..e26622f96 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -71,7 +71,8 @@ module ieu ( output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit output logic MDUStallD, CSRRdStallD, StoreStallD, output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction - output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions + output logic CSRWriteFenceM, // CSR write or fence instruction needs to flush subsequent instructions + output logic FenceM ); logic [2:0] ImmSrcD; // Select type of immediate extension @@ -102,7 +103,7 @@ controller c( .Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .BSelectE, .ZBBSelectE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, - .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD); + .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .FenceM, .StoreStallD); datapath dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index 330607af4..d5fd0c019 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -33,11 +33,11 @@ module RASPredictor #(parameter int StackSize = 16 )( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM, - input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong - input logic [3:0] InstrClassD, - input logic [3:0] InstrClassE, // Instr class - input logic [3:0] PredInstrClassF, - input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal + input logic BPReturnWrongD, // Prediction class is wrong + input logic ReturnD, + input logic ReturnE, CallE, // Instr class + input logic BPReturnF, + input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a call output logic [`XLEN-1:0] RASPCF // Top of the stack ); @@ -54,21 +54,21 @@ module RASPredictor #(parameter int StackSize = 16 )( logic IncrRepairD, DecRepairD; logic DecrementPtr; - logic FlushedRetDE; - logic WrongPredRetD; + logic FlushedReturnDE; + logic WrongPredReturnD; - assign PopF = PredInstrClassF[2] & ~StallD & ~FlushD; - assign PushE = InstrClassE[3] & ~StallM & ~FlushM; + assign PopF = BPReturnF & ~StallD & ~FlushD; + assign PushE = CallE & ~StallM & ~FlushM; - assign WrongPredRetD = (WrongPredInstrClassD[2]) & ~StallE & ~FlushE; - assign FlushedRetDE = (~StallE & FlushE & InstrClassD[2]) | (~StallM & FlushM & InstrClassE[2]); // flushed ret + assign WrongPredReturnD = (BPReturnWrongD) & ~StallE & ~FlushE; + assign FlushedReturnDE = (~StallE & FlushE & ReturnD) | (~StallM & FlushM & ReturnE); // flushed return - assign RepairD = WrongPredRetD | FlushedRetDE ; + assign RepairD = WrongPredReturnD | FlushedReturnDE ; - assign IncrRepairD = FlushedRetDE | (WrongPredRetD & ~InstrClassD[2]); // Guessed it was a ret, but its not + assign IncrRepairD = FlushedReturnDE | (WrongPredReturnD & ~ReturnD); // Guessed it was a return, but its not - assign DecRepairD = WrongPredRetD & InstrClassD[2]; // Guessed non ret but is a ret. + assign DecRepairD = WrongPredReturnD & ReturnD; // Guessed non return but is a return. assign CounterEn = PopF | PushE | RepairD; diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index ec974d14c..f2f16b51e 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -39,7 +39,7 @@ module bpred ( input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class input logic [`XLEN-1:0] PCNextF, // Next Fetch Address input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 - output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction + output logic [`XLEN-1:0] PC1NextF, // Branch Predictor predicted or corrected fetch address on miss prediction output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage // Update Predictor @@ -58,67 +58,76 @@ module bpred ( input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) - output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br // Report branch prediction status - output logic BPPredWrongE, // Prediction is wrong - output logic BPPredWrongM, // Prediction is wrong - output logic DirPredictionWrongM, // Prediction direction is wrong - output logic BTBPredPCWrongM, // Prediction target wrong + output logic BPWrongE, // Prediction is wrong + output logic BPWrongM, // Prediction is wrong + output logic BPDirPredWrongM, // Prediction direction is wrong + output logic BTAWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic PredictionInstrClassWrongM // Class prediction is wrong + output logic IClassWrongM // Class prediction is wrong ); - logic [1:0] DirPredictionF; + logic [1:0] BPDirPredF; - logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD; - logic [`XLEN-1:0] BTAF, RASPCF; - logic PredictionPCWrongE; - logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; - logic [3:0] InstrClassD; - logic [3:0] InstrClassE; - logic DirPredictionWrongE; + logic [`XLEN-1:0] BTAF, RASPCF; + logic BPPCWrongE; + logic IClassWrongE; + logic BPDirPredWrongE; - logic SelBPPredF; - logic [`XLEN-1:0] BPPredPCF; - logic [`XLEN-1:0] PCNext0F; - logic [`XLEN-1:0] PCCorrectE; - logic [3:0] WrongPredInstrClassD; + logic BPPCSrcF; + logic [`XLEN-1:0] BPPCF; + logic [`XLEN-1:0] PC0NextF; + logic [`XLEN-1:0] PCCorrectE; + logic [3:0] WrongPredInstrClassD; - logic BTBTargetWrongE; - logic RASTargetWrongE; + logic BTBTargetWrongE; + logic RASTargetWrongE; - logic [`XLEN-1:0] BTAD; + logic [`XLEN-1:0] BTAD; + logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF; + logic BPBranchF, BPJumpF, BPReturnF, BPCallF; + logic BPBranchD, BPJumpD, BPReturnD, BPCallD; + logic ReturnD, CallD; + logic ReturnE, CallE; + logic BranchM, JumpM, ReturnM, CallM; + logic BranchW, JumpW, ReturnW, CallW; + logic BPReturnWrongD; + logic [`XLEN-1:0] BTAE; + + + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor - twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + twoBitPredictor #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, + .FlushD, .FlushE, .FlushM, .FlushW, + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE") begin:Predictor gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE, + .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL") begin:Predictor gshare #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), + .PCNextF, .PCF, .PCD, .PCE, .PCM, .BPDirPredF, .BPDirPredWrongE, + .BPBranchF, .BranchD, .BranchE, .BranchM, .BranchW, .PCSrcE); end else if (`BPRED_TYPE == "BP_GSHARE_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BP_GLOBAL_BASIC") begin:Predictor gsharebasic #(`BPRED_SIZE, 0) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, - .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, - .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE); + .PCNextF, .PCM, .BPDirPredF, .BPDirPredWrongE, + .BranchE, .BranchM, .PCSrcE); end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor // *** Fix me @@ -126,7 +135,7 @@ module bpred ( localHistoryPredictor DirPredictor(.clk, .reset, .StallF, .StallE, .LookUpPC(PCNextF), - .Prediction(DirPredictionF), + .Prediction(BPDirPredF), // update .UpdatePC(PCE), .UpdateEN(InstrClassE[0] & ~StallE), @@ -141,139 +150,78 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, - .BTAF, .BTAD, - .BTBPredInstrClassF, - .PredictionInstrClassWrongM, + .BTAF, .BTAD, .BTAE, + .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), + .IClassWrongM, .IClassWrongE, .IEUAdrE, .IEUAdrM, - .InstrClassD, .InstrClassE, .InstrClassM); + .InstrClassD({CallD, ReturnD, JumpD, BranchD}), + .InstrClassE({CallE, ReturnE, JumpE, BranchE}), + .InstrClassM({CallM, ReturnM, JumpM, BranchM}), + .InstrClassW({CallW, ReturnW, JumpW, BranchW})); - // the branch predictor needs a compact decoding of the instruction class. - if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode - logic [3:0] InstrClassF; - logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; - logic JumpF, BranchF; + icpred #(`INSTR_CLASS_PRED) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, + .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, + .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .IClassWrongM, .IClassWrongE, .BPReturnWrongD); - if(`C_SUPPORTED) begin - logic [4:0] CompressedOpcF; - assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; - assign cjal = CompressedOpcF == 5'h09 & `XLEN == 32; - assign cj = CompressedOpcF == 5'h0d; - assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign CJumpF = cjal | cj | cjr | cjalr; - assign CBranchF = CompressedOpcF[4:1] == 4'h7; - end else begin - assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0; - end - - assign JumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; - assign BranchF = PostSpillInstrRawF[6:0] == 7'h63; - - assign InstrClassF[0] = BranchF | (`C_SUPPORTED & CBranchF); - assign InstrClassF[1] = JumpF | (`C_SUPPORTED & (CJumpF)); - assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5 - (`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); - - assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5 - (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); - - assign PredInstrClassF = InstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1]; - end else begin - assign PredInstrClassF = BTBPredInstrClassF; - assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) | - PredInstrClassF[1]; - end - // Part 3 RAS RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .PredInstrClassF, .InstrClassD, .InstrClassE, - .WrongPredInstrClassD, .RASPCF, .PCLinkE); + .BPReturnF, .ReturnD, .ReturnE, .CallE, + .BPReturnWrongD, .RASPCF, .PCLinkE); - assign BPPredPCF = PredInstrClassF[2] ? RASPCF : BTAF; - - assign InstrClassD[0] = BranchD; - assign InstrClassD[1] = JumpD ; - assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 - assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 - - flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); - flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); - flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); - - // branch predictor - flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, AnyWrongPredInstrClassE, PredictionInstrClassWrongM); - - // pipeline the class - flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD); - flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE); - // Check the prediction // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. // if the class prediction is wrong a regular instruction may have been predicted as a taken branch // this will result in PCD not being equal to the fall through address PCLinkE (PCE+4). // The next instruction is always valid as no other flush would occur at the same time as the branch and not // also flush the branch. This will change in a superscaler cpu. - assign PredictionPCWrongE = PCCorrectE != PCD; - - // branch class prediction wrong. - assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0]; - assign AnyWrongPredInstrClassD = |WrongPredInstrClassD; - // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. - //assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE)); - assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; - - logic BPPredWrongEAlt; - logic NotMatch; - assign BPPredWrongEAlt = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark - assign NotMatch = BPPredWrongE != BPPredWrongEAlt; - + assign BPWrongE = (PCCorrectE != PCD) & InstrValidE & InstrValidD; + flopenrc #(1) BPWrongMReg(clk, reset, FlushM, ~StallM, BPWrongE, BPWrongM); + // Output the predicted PC or corrected PC on miss-predict. + assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; + mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF); // Selects the BP or PC+2/4. - mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, SelBPPredF, PCNext0F); + mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF); // If the prediction is wrong select the correct address. - mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); + mux2 #(`XLEN) pcmux1(PC0NextF, PCCorrectE, BPWrongE, PC1NextF); // Correct branch/jump target. mux2 #(`XLEN) pccorrectemux(PCLinkE, IEUAdrE, PCSrcE, PCCorrectE); // If the fence/csrw was predicted as a taken branch then we select PCF, rather PCE. // Effectively this is PCM+4 or the non-existant PCLinkM - if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPPredWrongM, NextValidPCE); + if(`INSTR_CLASS_PRED) mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(PCE, PCF, BPWrongM, NextValidPCE); else assign NextValidPCE = PCE; if(`ZICOUNTERS_SUPPORTED) begin - logic JumpOrTakenBranchE; - logic [`XLEN-1:0] BTAE, RASPCD, RASPCE; - logic BTBPredPCWrongE, RASPredPCWrongE; - // performance counters - // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now - // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) - // 3. target ras (ras target wrong / class[2]) - // 4. direction (br dir wrong / class[0]) + logic [`XLEN-1:0] RASPCD, RASPCE; + logic BTBPredPCWrongE, RASPredPCWrongE; + // performance counters + // 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now + // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) + // 3. target ras (ras target wrong / class[2]) + // 4. direction (br dir wrong / class[0]) - // Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction - // could be wrong or the fall through address selected for branch predict not taken. - // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of - // both without the above inaccuracies. - assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE; - assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE; + // Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction + // could be wrong or the fall through address selected for branch predict not taken. + // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of + // both without the above inaccuracies. + // **** use BTAWrongM from BTB. + assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; + assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; - assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1]; - - flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - - flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); - - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); - flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); - flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, - {DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE}, - {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM}); - + flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); + flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); + flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, + {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, + {BPDirPredWrongM, BTAWrongM, RASPredPCWrongM}); + end else begin - assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; + assign {BTAWrongM, RASPredPCWrongM} = '0; end + + // **** Fix me + assign InstrClassM = {CallM, ReturnM, JumpM, BranchM}; endmodule diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index 2bb006714..b14399704 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -34,26 +34,33 @@ module btb #(parameter Depth = 10 ) ( input logic clk, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, - input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages + input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC - output logic [`XLEN-1:0] BTAD, - output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class + output logic [`XLEN-1:0] BTAD, + output logic [`XLEN-1:0] BTAE, + output logic [3:0] BTBIClassF, // BTB's guess at instruction class // update - input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong + input logic IClassWrongM, // BTB's instruction class guess was wrong + input logic IClassWrongE, input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb input logic [3:0] InstrClassD, // Instruction class to insert into btb input logic [3:0] InstrClassE, // Instruction class to insert into btb - input logic [3:0] InstrClassM // Instruction class to insert into btb + input logic [3:0] InstrClassM, // Instruction class to insert into btb + input logic [3:0] InstrClassW ); - logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex; - logic [`XLEN-1:0] ResetPC; - logic MatchF, MatchD, MatchE, MatchM, MatchNextX, MatchXF; - logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; - logic [`XLEN+3:0] TableBTBPredictionF; - logic UpdateEn; - + logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; + logic [`XLEN-1:0] ResetPC; + logic MatchD, MatchE, MatchM, MatchW, MatchX; + logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; + logic [`XLEN+3:0] TableBTBPredF; + logic [`XLEN-1:0] IEUAdrW; + logic [`XLEN-1:0] PCW; + logic BTBWrongE, BTAWrongE; + logic BTBWrongM, BTAWrongM; + + // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. // bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if @@ -62,6 +69,7 @@ module btb #(parameter Depth = 10 ) ( assign PCDIndex = {PCD[Depth+1] ^ PCD[1], PCD[Depth:2]}; assign PCEIndex = {PCE[Depth+1] ^ PCE[1], PCE[Depth:2]}; assign PCMIndex = {PCM[Depth+1] ^ PCM[1], PCM[Depth:2]}; + assign PCWIndex = {PCW[Depth+1] ^ PCW[1], PCW[Depth:2]}; // must output a valid PC and valid bit during reset. Because only PCF, not PCNextF is reset, PCNextF is invalid // during reset. The BTB must produce a non X PC1NextF to allow the simulation to run. @@ -70,31 +78,38 @@ module btb #(parameter Depth = 10 ) ( assign ResetPC = `RESET_VECTOR; assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]}; - assign MatchF = PCNextFIndex == PCFIndex; - assign MatchD = PCNextFIndex == PCDIndex; - assign MatchE = PCNextFIndex == PCEIndex; - assign MatchM = PCNextFIndex == PCMIndex; - assign MatchNextX = MatchF | MatchD | MatchE | MatchM; - - flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + assign MatchD = PCFIndex == PCDIndex; + assign MatchE = PCFIndex == PCEIndex; + assign MatchM = PCFIndex == PCMIndex; + assign MatchW = PCFIndex == PCWIndex; + assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardBTBPrediction = MatchF ? {BTBPredInstrClassF, BTAF} : - MatchD ? {InstrClassD, BTAD} : - MatchE ? {InstrClassE, IEUAdrE} : - {InstrClassM, IEUAdrM} ; + assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} : + MatchE ? {InstrClassE, IEUAdrE} : + MatchM ? {InstrClassM, IEUAdrM} : + {InstrClassW, IEUAdrW} ; - flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); + assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; - assign {BTBPredInstrClassF, BTAF} = MatchXF ? ForwardBTBPredictionF : {TableBTBPredictionF}; - - - assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( - .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF), - .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); + .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), + .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); + // BTAE is not strickly necessary. However it is used by two parts of wally. + // 1. It gates updates to the BTB when the prediction does not change. This save power. + // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. + flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); + assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); + + flopenrc #(1) BTAWrongMReg(clk, reset, FlushM, ~StallM, BTAWrongE, BTAWrongM); + assign BTBWrongM = BTAWrongM | IClassWrongM; + + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); + flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); + + endmodule diff --git a/src/ifu/bpred/gshare.sv b/src/ifu/bpred/gshare.sv index 5332ce5cd..66b2b4842 100644 --- a/src/ifu/bpred/gshare.sv +++ b/src/ifu/bpred/gshare.sv @@ -35,20 +35,20 @@ module gshare #(parameter k = 10, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, // update input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, - input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, PCSrcE + input logic BPBranchF, BranchD, BranchE, BranchM, BranchW, PCSrcE ); - logic MatchF, MatchD, MatchE, MatchM; - logic MatchNextX, MatchXF; + logic MatchF, MatchD, MatchE, MatchM, MatchW; + logic MatchX; - logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE, ForwardNewDirPrediction, ForwardDirPredictionF; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] TableBPDirPredF, BPDirPredD, BPDirPredE, FwdNewDirPredF; + logic [1:0] NewBPDirPredE, NewBPDirPredM, NewBPDirPredW; - logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM; + logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE, IndexM, IndexW; logic [k-1:0] GHRF, GHRD, GHRE, GHRM; logic [k-1:0] GHRNextM, GHRNextF; @@ -68,48 +68,47 @@ module gshare #(parameter k = 10, assign IndexM = GHRM; end - assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); - assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); - assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); - assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM); - assign MatchNextX = MatchF | MatchD | MatchE | MatchM; + flopenrc #(k) IndexWReg(clk, reset, FlushW, ~StallW, IndexM, IndexW); - flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); + assign MatchD = BranchD & ~FlushE & (IndexF == IndexD); + assign MatchE = BranchE & ~FlushM & (IndexF == IndexE); + assign MatchM = BranchM & ~FlushW & (IndexF == IndexM); + assign MatchW = BranchW & ~FlushW & (IndexF == IndexW); + assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardNewDirPrediction = MatchF ? {2{DirPredictionF[1]}} : - MatchD ? {2{DirPredictionD[1]}} : - MatchE ? {NewDirPredictionE} : - NewDirPredictionM ; + assign FwdNewDirPredF = MatchD ? {2{BPDirPredD[1]}} : + MatchE ? {NewBPDirPredE} : + MatchM ? {NewBPDirPredM} : + NewBPDirPredW ; - flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); - - assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; + assign BPDirPredF = MatchX ? FwdNewDirPredF : TableBPDirPredF; ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), - .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(TableDirPredictionF), + .rd1(TableBPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), - .we2(BranchInstrM), + .wd2(NewBPDirPredM), + .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); + flopenrc #(2) NewPredictionRegW(clk, reset, FlushW, ~StallW, NewBPDirPredM, NewBPDirPredW); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; - assign GHRNextF = BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : GHRF; - assign GHRF = BranchInstrD ? {DirPredictionD[1], GHRD[k-1:1]} : GHRD; - assign GHRD = BranchInstrE ? {PCSrcE, GHRE[k-1:1]} : GHRE; - assign GHRE = BranchInstrM ? {PCSrcM, GHRM[k-1:1]} : GHRM; + assign GHRNextF = BPBranchF ? {BPDirPredF[1], GHRF[k-1:1]} : GHRF; + assign GHRF = BranchD ? {BPDirPredD[1], GHRD[k-1:1]} : GHRD; + assign GHRD = BranchE ? {PCSrcE, GHRE[k-1:1]} : GHRE; + assign GHRE = BranchM ? {PCSrcM, GHRM[k-1:1]} : GHRM; assign GHRNextM = {PCSrcM, GHRM[k-1:1]}; - flopenr #(k) GHRReg(clk, reset, ~StallW & ~FlushW & BranchInstrM, GHRNextM, GHRM); + flopenr #(k) GHRReg(clk, reset, ~StallW & ~FlushW & BranchM, GHRNextM, GHRM); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); endmodule diff --git a/src/ifu/bpred/gsharebasic.sv b/src/ifu/bpred/gsharebasic.sv index cb0bbe9eb..130f17328 100644 --- a/src/ifu/bpred/gsharebasic.sv +++ b/src/ifu/bpred/gsharebasic.sv @@ -35,16 +35,16 @@ module gsharebasic #(parameter k = 10, input logic reset, input logic StallF, StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, // update input logic [`XLEN-1:0] PCNextF, PCM, - input logic BranchInstrE, BranchInstrM, PCSrcE + input logic BranchE, BranchM, PCSrcE ); logic [k-1:0] IndexNextF, IndexM; - logic [1:0] DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] BPDirPredD, BPDirPredE; + logic [1:0] NewBPDirPredE, NewBPDirPredM; logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR; logic [k-1:0] GHRNext; @@ -61,22 +61,22 @@ module gsharebasic #(parameter k = 10, ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(DirPredictionF), + .rd1(BPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), - .we2(BranchInstrM), + .wd2(NewBPDirPredM), + .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; - assign GHRNext = BranchInstrM ? {PCSrcM, GHR[k-1:1]} : GHR; - flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchInstrM, GHRNext, GHR); + assign GHRNext = BranchM ? {PCSrcM, GHR[k-1:1]} : GHR; + flopenr #(k) GHRReg(clk, reset, ~StallM & ~FlushM & BranchM, GHRNext, GHR); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF); diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv new file mode 100644 index 000000000..14e7c8d89 --- /dev/null +++ b/src/ifu/bpred/icpred.sv @@ -0,0 +1,106 @@ +/////////////////////////////////////////// +// icpred.sv +// +// Written: Ross Thomposn ross1728@gmail.com +// Created: February 26, 2023 +// Modified: February 26, 2023 +// +// Purpose: Partial decode of instructions into control flow instructions (cfi)P +// Call, Return, Jump, and Branch +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + + +module icpred #(parameter INSTR_CLASS_PRED = 1)( + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, + input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction + input logic BranchD, BranchE, + input logic JumpD, JumpE, + output logic BranchM, BranchW, + output logic JumpM, JumpW, + output logic CallD, CallE, CallM, CallW, + output logic ReturnD, ReturnE, ReturnM, ReturnW, + input logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF, + output logic BPCallF, BPReturnF, BPJumpF, BPBranchF, + output logic IClassWrongM, BPReturnWrongD, IClassWrongE +); + + logic IClassWrongD; + logic BPBranchD, BPJumpD, BPReturnD, BPCallD; + + if (!INSTR_CLASS_PRED) begin : DirectClassDecode + // This section is mainly for testing, verification, and PPA comparison. + // An alternative to using the BTB to store the instruction class is to partially decode + // the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions. + // This logic is not described in the text book as of 23 February 2023. + logic ccall, cj, cjr, ccallr, CJumpF, CBranchF; + logic NCJumpF, NCBranchF; + + if(`C_SUPPORTED) begin + logic [4:0] CompressedOpcF; + assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; + assign ccall = CompressedOpcF == 5'h09 & `XLEN == 32; + assign cj = CompressedOpcF == 5'h0d; + assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign CJumpF = ccall | cj | cjr | ccallr; + assign CBranchF = CompressedOpcF[4:1] == 4'h7; + end else begin + assign {ccall, cj, cjr, ccallr, CJumpF, CBranchF} = '0; + end + + assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; + assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; + + assign BPBranchF = NCBranchF | (`C_SUPPORTED & CBranchF); + assign BPJumpF = NCJumpF | (`C_SUPPORTED & (CJumpF)); + assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5 + (`C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + + assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5 + (`C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + + end else begin + // This section connects the BTB's instruction class prediction. + assign {BPCallF, BPReturnF, BPJumpF, BPBranchF} = {BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}; + end + + assign ReturnD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // returnurn must returnurn to ra or x5 + assign CallD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // call(r) must link to ra or x5 + + flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {CallD, ReturnD}, {CallE, ReturnE}); + flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, {CallE, ReturnE, JumpE, BranchE}, {CallM, ReturnM, JumpM, BranchM}); + flopenrc #(4) InstrClassRegW(clk, reset, FlushM, ~StallW, {CallM, ReturnM, JumpM, BranchM}, {CallW, ReturnW, JumpW, BranchW}); + + // branch predictor + flopenrc #(1) BPClassWrongRegM(clk, reset, FlushM, ~StallM, IClassWrongE, IClassWrongM); + flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, IClassWrongD, IClassWrongE); + + // pipeline the predicted class + flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, {BPCallF, BPReturnF, BPJumpF, BPBranchF}, {BPCallD, BPReturnD, BPJumpD, BPBranchD}); + + // branch class prediction wrong. + assign IClassWrongD = |({BPCallD, BPReturnD, BPJumpD, BPBranchD} ^ {CallD, ReturnD, JumpD, BranchD}); + assign BPReturnWrongD = BPReturnD ^ ReturnD; + +endmodule diff --git a/src/ifu/bpred/twoBitPredictor.sv b/src/ifu/bpred/twoBitPredictor.sv index 4a7be674d..7011a0580 100644 --- a/src/ifu/bpred/twoBitPredictor.sv +++ b/src/ifu/bpred/twoBitPredictor.sv @@ -31,20 +31,20 @@ module twoBitPredictor #(parameter k = 10) ( input logic clk, input logic reset, - input logic StallF, StallD, StallE, StallM, - input logic FlushD, FlushE, FlushM, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCM, - output logic [1:0] DirPredictionF, - output logic DirPredictionWrongE, - input logic BranchInstrE, BranchInstrM, + output logic [1:0] BPDirPredF, + output logic BPDirPredWrongE, + input logic BranchE, BranchM, input logic PCSrcE ); logic [k-1:0] IndexNextF, IndexM; logic [1:0] PredictionMemory; logic DoForwarding, DoForwardingF; - logic [1:0] DirPredictionD, DirPredictionE; - logic [1:0] NewDirPredictionE, NewDirPredictionM; + logic [1:0] BPDirPredD, BPDirPredE; + logic [1:0] NewBPDirPredE, NewBPDirPredM; // hashing function for indexing the PC // We have k bits to index, but XLEN bits as the input. @@ -55,21 +55,21 @@ module twoBitPredictor #(parameter k = 10) ( ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), - .ce1(~StallF), .ce2(~StallM & ~FlushM), + .ce1(~StallF), .ce2(~StallW & ~FlushW), .ra1(IndexNextF), - .rd1(DirPredictionF), + .rd1(BPDirPredF), .wa2(IndexM), - .wd2(NewDirPredictionM), - .we2(BranchInstrM & ~StallM & ~FlushM), + .wd2(NewBPDirPredM), + .we2(BranchM), .bwe2(1'b1)); - flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD); - flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, DirPredictionD, DirPredictionE); + flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, BPDirPredF, BPDirPredD); + flopenrc #(2) PredictionRegE(clk, reset, FlushE, ~StallE, BPDirPredD, BPDirPredE); - assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE; + assign BPDirPredWrongE = PCSrcE != BPDirPredE[1] & BranchE; - satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); - flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewDirPredictionE, NewDirPredictionM); + satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(BPDirPredE), .NewState(NewBPDirPredE)); + flopenrc #(2) NewPredictionRegM(clk, reset, FlushM, ~StallM, NewBPDirPredE, NewBPDirPredM); endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 71221ef63..2c2ee7b4e 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -54,22 +54,22 @@ module ifu ( input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address output logic [`XLEN-1:0] PCE, // Execution stage instruction address - output logic BPPredWrongE, // Prediction is wrong - output logic BPPredWrongM, // Prediction is wrong + output logic BPWrongE, // Prediction is wrong + output logic BPWrongM, // Prediction is wrong // Mem output logic CommittedF, // I$ or bus memory operation started, delay interrupts input logic [`XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes. - output logic [`XLEN-1:0] PCNext2F, // Selected PC between branch prediction and next valid PC if CSRWriteFence + output logic [`XLEN-1:0] PC2NextF, // Selected PC between branch prediction and next valid PC if CSRWriteFence output logic [31:0] InstrD, // The decoded instruction in Decode stage output logic [31:0] InstrM, // The decoded instruction in Memory stage output logic [`XLEN-1:0] PCM, // Memory stage instruction address // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic JumpOrTakenBranchM, - output logic DirPredictionWrongM, // Prediction direction is wrong - output logic BTBPredPCWrongM, // Prediction target wrong + output logic BPDirPredWrongM, // Prediction direction is wrong + output logic BTAWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic PredictionInstrClassWrongM, // Class prediction is wrong + output logic IClassWrongM, // Class prediction is wrong + output logic ICacheStallF, // I$ busy with multicycle operation // Faults input logic IllegalBaseInstrD, // Illegal non-compressed instruction input logic IllegalFPUInstrD, // Illegal FP instruction @@ -88,7 +88,7 @@ module ifu ( input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - output logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit output logic InstrAccessFaultF, // Instruction access fault @@ -128,11 +128,10 @@ module ifu ( logic CacheableF; // PMA indicates instruction address is cacheable logic SelNextSpillF; // In a spill, stall pipeline and gate local stallF logic BusStall; // Bus interface busy with multicycle operation - logic ICacheStallF; // I$ busy with multicycle operation logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill // branch predictor signal - logic [`XLEN-1:0] PCNext1F; // Branch predictor next PCF + logic [`XLEN-1:0] PC1NextF; // Branch predictor next PCF logic BusCommittedF; // Bus memory operation in flight, delay interrupts logic CacheCommittedF; // I$ memory operation started, delay interrupts logic SelIROM; // PMA indicates instruction address is in the IROM @@ -145,7 +144,7 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill assign PCNextFSpill = PCNextF; assign PCFSpill = PCF; @@ -185,12 +184,12 @@ module ifu ( .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), - .DAPageFault(InstrDAPageFaultF), + .UpdateDA(InstrUpdateDAF), .AtomicAccessM(1'b0),.ExecuteAccessF(1'b1), .WriteAccessM(1'b0), .ReadAccessM(1'b0), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); end else begin - assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrDAPageFaultF} = '0; + assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrUpdateDAF} = '0; assign PCPF = PCFExt[`PA_BITS-1:0]; assign CacheableF = '1; assign SelIROM = '0; @@ -297,8 +296,8 @@ module ifu ( //////////////////////////////////////////////////////////////////////////////////////////////// if(`ZICSR_SUPPORTED | `ZIFENCEI_SUPPORTED) - mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PCNext2F)); - else assign PCNext2F = PCNext1F; + mux2 #(`XLEN) pcmux2(.d0(PC1NextF), .d1(NextValidPCE), .s(CSRWriteFenceM),.y(PC2NextF)); + else assign PC2NextF = PC1NextF; assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); @@ -330,14 +329,14 @@ module ifu ( .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .BranchD, .BranchE, .JumpD, .JumpE, - .InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, - .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM); + .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, + .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .BPWrongM, + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred - mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PCNext1F)); - assign BPPredWrongE = PCSrcE; - assign {InstrClassM, DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; + mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); + assign BPWrongE = PCSrcE; + assign {InstrClassM, BPDirPredWrongM, BTAWrongM, RASPredPCWrongM, IClassWrongM} = '0; assign NextValidPCE = PCE; end diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 4bb677cab..4b89a3cef 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -42,7 +42,7 @@ module spill #( input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic ITLBMissF, // ITLB miss, ignore memory request - input logic InstrDAPageFaultF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) + input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline @@ -77,7 +77,7 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; - assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF)); + assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF)); always_ff @(posedge clk) if (reset | FlushD) CurrState <= #1 STATE_READY; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e01de3128..9f11f7007 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -54,6 +54,7 @@ module lsu ( input logic [1:0] PrivilegeModeW, // Current privilege mode input logic BigEndianM, // Swap byte order to big endian input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries + output logic DCacheStallM, // D$ busy with multicycle operation // fpu input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU input logic FpLoadStoreM, // Selects FPU as store for write data @@ -81,7 +82,7 @@ module lsu ( input logic [1:0] STATUS_MPP, // Machine previous privilege mode input logic [`XLEN-1:0] PCFSpill, // Fetch PC input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB output logic [1:0] PageType, // Type of page table entry to write to ITLB output logic ITLBWriteF, // Write PTE to ITLB @@ -103,7 +104,6 @@ module lsu ( logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1 - logic DCacheStallM; // D$ busy with multicycle operation logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation @@ -127,7 +127,7 @@ module lsu ( logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB - logic DataDAPageFaultM; // DTLB hit needs to update dirty or access bits + logic DataUpdateDAM; // DTLB hit needs to update dirty or access bits logic LSULoadAccessFaultM; // Load acces fault logic LSUStoreAmoAccessFaultM; // Store access fault logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle @@ -151,7 +151,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, - .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, + .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, .FlushW, .DCacheStallM, .SATP_REGW, .PCFSpill, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN @@ -196,7 +196,7 @@ module lsu ( .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM, .StoreAmoPageFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. - .DAPageFault(DataDAPageFaultM), + .UpdateDA(DataUpdateDAM), .AtomicAccessM(|LSUAtomicM), .ExecuteAccessF(1'b0), .WriteAccessM(PreLSURWM[0]), .ReadAccessM(PreLSURWM[1]), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); diff --git a/src/mdu/mdu.sv b/src/mdu/mdu.sv index 4a85bf478..b62add60d 100644 --- a/src/mdu/mdu.sv +++ b/src/mdu/mdu.sv @@ -51,16 +51,18 @@ module mdu( // Divider // Start a divide when a new division instruction is received and the divider isn't already busy or finishing // When IDIV_ON_FPU is set, use the FPU divider instead - if (`IDIV_ON_FPU) begin + // In ZMMUL, with M_SUPPORTED = 0, omit the divider + if ((`IDIV_ON_FPU) || (!`M_SUPPORTED)) begin:nodiv assign QuotM = 0; assign RemM = 0; assign DivBusyE = 0; - end else begin + end else begin:div intdivrestoring div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE, .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); end // Result multiplexer + // For ZMMUL, QuotM and RemM are tied to 0, so the mux automatically simplifies always_comb case (Funct3M) 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; // mul diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index 679bd185d..b1832d8c9 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -38,8 +38,8 @@ module adrdecs ( localparam logic [3:0] SUPPORTED_SIZE = (`LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed - adrdec dtimdec(PhysicalAddress, `DTIM_BASE, `DTIM_RANGE, `DTIM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[10]); - adrdec iromdec(PhysicalAddress, `IROM_BASE, `IROM_RANGE, `IROM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[9]); + adrdec dtimdec(PhysicalAddress, `DTIM_BASE, `DTIM_RANGE, `DTIM_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[10]); + adrdec iromdec(PhysicalAddress, `IROM_BASE, `IROM_RANGE, `IROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[9]); adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[8]); adrdec bootromdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec uncoreramdec(PhysicalAddress, `UNCORE_RAM_BASE, `UNCORE_RAM_RANGE, `UNCORE_RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[6]); diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index c0b7ad933..f2df8ea92 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -49,8 +49,8 @@ module hptw ( input logic ITLBMissF, input logic DTLBMissM, input logic FlushW, - input logic InstrDAPageFaultF, - input logic DataDAPageFaultM, + input logic InstrUpdateDAF, + input logic DataUpdateDAM, output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry @@ -87,21 +87,23 @@ module hptw ( logic [`XLEN-1:0] TranslationVAdr; logic [`XLEN-1:0] NextPTE; logic UpdatePTE; - logic DAPageFault; + logic HPTWUpdateDA; logic [`PA_BITS-1:0] HPTWReadAdr; logic SelHPTWAdr; logic [`XLEN+1:0] HPTWAdrExt; logic ITLBMissOrDAFaultF; logic DTLBMissOrDAFaultM; + logic LSUAccessFaultM; logic [`PA_BITS-1:0] HPTWAdr; logic [1:0] HPTWRW; logic [2:0] HPTWSize; // 32 or 64 bit access statetype WalkerState, NextWalkerState, InitialWalkerState; // map hptw access faults onto either the original LSU load/store fault or instruction access fault - assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[1] & ~MemRWM[0]; - assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : (LSULoadAccessFaultM | LSUStoreAmoAccessFaultM) & DTLBWalk & MemRWM[0]; - assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: (LSUStoreAmoAccessFaultM | LSULoadAccessFaultM) & ~DTLBWalk; + assign LSUAccessFaultM = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM; + assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0]; + assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[0]; + assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: LSUAccessFaultM & ~DTLBWalk; // Extract bits from CSRs and inputs assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; @@ -125,10 +127,10 @@ module hptw ( assign ValidLeafPTE = ValidPTE & LeafPTE; assign ValidNonLeafPTE = ValidPTE & ~LeafPTE; - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites + if(`SVADU_SUPPORTED) begin : hptwwrites logic ReadAccess, WriteAccess; - logic InvalidRead, InvalidWrite; - logic UpperBitsUnequalPageFault; + logic InvalidRead, InvalidWrite, InvalidOp; + logic UpperBitsUnequal; logic OtherPageFault; logic [1:0] EffectivePrivilegeMode; logic ImproperPrivilege; @@ -147,7 +149,7 @@ module hptw ( mux2 #(`PA_BITS) HPTWWriteAdrMux(HPTWReadAdr, HPTWWriteAdr, SelHPTWWriteAdr, HPTWAdr); assign {Dirty, Accessed} = PTE[7:6]; - assign WriteAccess = MemRWM[0] | (|AtomicM); + assign WriteAccess = MemRWM[0]; // implies | (|AtomicM); assign SetDirty = ~Dirty & DTLBWalk & WriteAccess; assign ReadAccess = MemRWM[1]; @@ -157,24 +159,24 @@ module hptw ( // Check for page faults vm64check vm64check(.SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .VAdr(TranslationVAdr), - .SV39Mode(), .UpperBitsUnequalPageFault); + .SV39Mode(), .UpperBitsUnequal); assign InvalidRead = ReadAccess & ~Readable & (~STATUS_MXR | ~Executable); assign InvalidWrite = WriteAccess & ~Writable; - assign OtherPageFault = DTLBWalk? ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~Valid : - ImproperPrivilege | ~Executable | UpperBitsUnequalPageFault | Misaligned | ~Valid; + assign InvalidOp = DTLBWalk ? (InvalidRead | InvalidWrite) : ~Executable; + assign OtherPageFault = ImproperPrivilege | InvalidOp | UpperBitsUnequal | Misaligned | ~Valid; // hptw needs to know if there is a Dirty or Access fault occuring on this // memory access. If there is the PTE needs to be updated seting Access // and possibly also Dirty. Dirty is set if the operation is a store/amo. // However any other fault should not cause the update. - assign DAPageFault = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; + assign HPTWUpdateDA = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; assign HPTWRW[0] = (WalkerState == UPDATE_PTE); - assign UpdatePTE = (WalkerState == LEAF) & DAPageFault; + assign UpdatePTE = (WalkerState == LEAF) & HPTWUpdateDA; end else begin // block: hptwwrites assign NextPTE = ReadDataM; assign HPTWAdr = HPTWReadAdr; - assign DAPageFault = '0; + assign HPTWUpdateDA = '0; assign UpdatePTE = '0; assign HPTWRW[0] = '0; end @@ -182,8 +184,8 @@ module hptw ( // Enable and select signals based on states assign StartWalk = (WalkerState == IDLE) & TLBMiss; assign HPTWRW[1] = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); - assign DTLBWriteM = (WalkerState == LEAF & ~DAPageFault) & DTLBWalk; - assign ITLBWriteF = (WalkerState == LEAF & ~DAPageFault) & ~DTLBWalk; + assign DTLBWriteM = (WalkerState == LEAF & ~HPTWUpdateDA) & DTLBWalk; + assign ITLBWriteF = (WalkerState == LEAF & ~HPTWUpdateDA) & ~DTLBWalk; // FSM to track PageType based on the levels of the page table traversed flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); @@ -262,7 +264,7 @@ module hptw ( else NextWalkerState = LEAF; L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - LEAF: if (`HPTW_WRITES_SUPPORTED & DAPageFault) NextWalkerState = UPDATE_PTE; + LEAF: if (`SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; @@ -273,8 +275,8 @@ module hptw ( assign SelHPTW = WalkerState != IDLE; assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); - assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); - assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); + assign ITLBMissOrDAFaultF = ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF); + assign DTLBMissOrDAFaultM = DTLBMissM | (`SVADU_SUPPORTED & DataUpdateDAM); // HTPW address/data/control muxing @@ -291,7 +293,7 @@ module hptw ( mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, IHAdrM); - if(`HPTW_WRITES_SUPPORTED) + if(`SVADU_SUPPORTED) mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IHWriteDataM); else assign IHWriteDataM = WriteDataM; diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 5b5248161..e3cd8031e 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -51,7 +51,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, // access fault sources output logic InstrPageFaultF, LoadPageFaultM, StoreAmoPageFaultM, // page fault sources - output logic DAPageFault, // page fault due to setting dirty or access bit + output logic UpdateDA, // page fault due to setting dirty or access bit output logic LoadMisalignedFaultM, StoreAmoMisalignedFaultM, // misaligned fault sources // PMA checker signals input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // access type @@ -70,6 +70,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( logic Translate; // Translation occurs when virtual memory is active and DisableTranslation is off logic TLBHit; // Hit in TLB logic TLBPageFault; // Page fault from TLB + logic ReadNoAmoAccessM; // Read that is not part of atomic operation causes Load faults. Otherwise StoreAmo faults // only instantiate TLB if Virtual Memory is supported if (`VIRTMEM_SUPPORTED) begin:tlb @@ -84,7 +85,7 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, - .Translate, .TLBPageFault, .DAPageFault); + .Translate, .TLBPageFault, .UpdateDA); end else begin:tlb// just pass address through as physical assign Translate = 0; assign TLBMiss = 0; @@ -118,11 +119,13 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( assign PMPLoadAccessFaultM = 0; end + assign ReadNoAmoAccessM = ReadAccessM & ~WriteAccessM;// AMO causes StoreAmo rather than Load fault + // Access faults // If TLB miss and translating we want to not have faults from the PMA and PMP checkers. - assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~(Translate & ~TLBHit); - assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~(Translate & ~TLBHit); - assign StoreAmoAccessFaultM = (PMAStoreAmoAccessFaultM | PMPStoreAmoAccessFaultM) & ~(Translate & ~TLBHit); + assign InstrAccessFaultF = (PMAInstrAccessFaultF | PMPInstrAccessFaultF) & ~TLBMiss; + assign LoadAccessFaultM = (PMALoadAccessFaultM | PMPLoadAccessFaultM) & ~TLBMiss; + assign StoreAmoAccessFaultM = (PMAStoreAmoAccessFaultM | PMPStoreAmoAccessFaultM) & ~TLBMiss; // Misaligned faults always_comb @@ -132,11 +135,11 @@ module mmu #(parameter TLB_ENTRIES = 8, IMMU = 0) ( 2'b10: DataMisalignedM = VAdr[1] | VAdr[0]; // lw, sw, flw, fsw, lwu 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase - assign LoadMisalignedFaultM = DataMisalignedM & ReadAccessM; - assign StoreAmoMisalignedFaultM = DataMisalignedM & (WriteAccessM | AtomicAccessM); + assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM; + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM; // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; - assign LoadPageFaultM = TLBPageFault & ReadAccessM; - assign StoreAmoPageFaultM = TLBPageFault & (WriteAccessM | AtomicAccessM); + assign LoadPageFaultM = TLBPageFault & ReadNoAmoAccessM; + assign StoreAmoPageFaultM = TLBPageFault & WriteAccessM; endmodule diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index e46cfe8b9..909cc564d 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -58,8 +58,12 @@ module pmachecker ( // Only non-core RAM/ROM memory regions are cacheable assign Cacheable = SelRegions[8] | SelRegions[7] | SelRegions[6]; - assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; - assign AtomicAllowed = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; + // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly + // I/O is nonidempotent. + assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6]; + // Atomic operations are only allowed on RAM + assign AtomicAllowed = SelRegions[10] | SelRegions[8] | SelRegions[6]; + // Check if tightly integrated memories are selected assign SelTIM = SelRegions[10] | SelRegions[9]; // Detect access faults diff --git a/src/mmu/pmpadrdec.sv b/src/mmu/pmpadrdec.sv index 4aeea116a..5f666eeaf 100644 --- a/src/mmu/pmpadrdec.sv +++ b/src/mmu/pmpadrdec.sv @@ -85,5 +85,9 @@ module pmpadrdec ( assign W = PMPCfg[1]; assign R = PMPCfg[0]; assign Active = |PMPCfg[4:3]; + + // known bug: The size of the access is not yet checked. For example, if an NA4 entry matches 0xC-0xF and the system + // attempts an 8-byte access to 0x8, the access should fail (see page 60 of privileged specification 20211203). This + // implementation will not detect the failure. endmodule diff --git a/src/mmu/tlb.sv b/src/mmu/tlb/tlb.sv similarity index 98% rename from src/mmu/tlb.sv rename to src/mmu/tlb/tlb.sv index f8bf0d178..7d6cd317f 100644 --- a/src/mmu/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -72,7 +72,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( output logic TLBHit, output logic Translate, output logic TLBPageFault, - output logic DAPageFault + output logic UpdateDA ); logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex @@ -105,7 +105,7 @@ module tlb #(parameter TLB_ENTRIES = 8, ITLB = 0) ( tlbcontrol #(ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, - .DAPageFault, .SV39Mode, .Translate); + .UpdateDA, .SV39Mode, .Translate); tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) diff --git a/src/mmu/tlbcam.sv b/src/mmu/tlb/tlbcam.sv similarity index 100% rename from src/mmu/tlbcam.sv rename to src/mmu/tlb/tlbcam.sv diff --git a/src/mmu/tlbcamline.sv b/src/mmu/tlb/tlbcamline.sv similarity index 100% rename from src/mmu/tlbcamline.sv rename to src/mmu/tlb/tlbcamline.sv diff --git a/src/mmu/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv similarity index 81% rename from src/mmu/tlbcontrol.sv rename to src/mmu/tlb/tlbcontrol.sv index abbdba8f6..9754124da 100644 --- a/src/mmu/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -43,7 +43,7 @@ module tlbcontrol #(parameter ITLB = 0) ( output logic TLBMiss, output logic TLBHit, output logic TLBPageFault, - output logic DAPageFault, + output logic UpdateDA, output logic SV39Mode, output logic Translate ); @@ -52,7 +52,7 @@ module tlbcontrol #(parameter ITLB = 0) ( logic [1:0] EffectivePrivilegeMode; logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R, PTE_V; // Useful PTE Control Bits - logic UpperBitsUnequalPageFault; + logic UpperBitsUnequal; logic TLBAccess; logic ImproperPrivilege; @@ -64,7 +64,7 @@ module tlbcontrol #(parameter ITLB = 0) ( assign TLBAccess = ReadAccess | WriteAccess; // Check that upper bits are legal (all 0s or all 1s) - vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequalPageFault); + vm64check vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); // unswizzle useful PTE bits assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; @@ -76,13 +76,13 @@ module tlbcontrol #(parameter ITLB = 0) ( // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) & ~PTE_U) | ((EffectivePrivilegeMode == `S_MODE) & PTE_U); - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & ~PTE_A & ~TLBPageFault; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + if(`SVADU_SUPPORTED) begin : hptwwrites + assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault; + assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V); end else begin // fault for software handling if access bit is off - assign DAPageFault = ~PTE_A; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | ~PTE_X | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + assign UpdateDA = ~PTE_A; + assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V); end end else begin:dtlb // Data TLB fault checking logic InvalidRead, InvalidWrite; @@ -98,16 +98,16 @@ module tlbcontrol #(parameter ITLB = 0) ( // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess & ~PTE_W; - if(`HPTW_WRITES_SUPPORTED) begin : hptwwrites - assign DAPageFault = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + if(`SVADU_SUPPORTED) begin : hptwwrites + assign UpdateDA = Translate & TLBHit & (~PTE_A | WriteAccess & ~PTE_D) & ~TLBPageFault; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpperBitsUnequal | Misaligned | ~PTE_V)); end else begin // Fault for software handling if access bit is off or writing a page with dirty bit off - assign DAPageFault = ~PTE_A | WriteAccess & ~PTE_D; - assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | DAPageFault | UpperBitsUnequalPageFault | Misaligned | ~PTE_V)); + assign UpdateDA = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = (Translate & TLBHit & (ImproperPrivilege | InvalidRead | InvalidWrite | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V)); end end assign TLBHit = CAMHit & TLBAccess; - assign TLBMiss = (~CAMHit | TLBFlush) & Translate & TLBAccess; + assign TLBMiss = ~CAMHit & TLBAccess & Translate ; endmodule diff --git a/src/mmu/tlblru.sv b/src/mmu/tlb/tlblru.sv similarity index 100% rename from src/mmu/tlblru.sv rename to src/mmu/tlb/tlblru.sv diff --git a/src/mmu/tlbmixer.sv b/src/mmu/tlb/tlbmixer.sv similarity index 100% rename from src/mmu/tlbmixer.sv rename to src/mmu/tlb/tlbmixer.sv diff --git a/src/mmu/tlbram.sv b/src/mmu/tlb/tlbram.sv similarity index 100% rename from src/mmu/tlbram.sv rename to src/mmu/tlb/tlbram.sv diff --git a/src/mmu/tlbramline.sv b/src/mmu/tlb/tlbramline.sv similarity index 100% rename from src/mmu/tlbramline.sv rename to src/mmu/tlb/tlbramline.sv diff --git a/src/mmu/vm64check.sv b/src/mmu/tlb/vm64check.sv similarity index 90% rename from src/mmu/vm64check.sv rename to src/mmu/tlb/vm64check.sv index a78b853e1..5f12eef7a 100644 --- a/src/mmu/vm64check.sv +++ b/src/mmu/tlb/vm64check.sv @@ -32,7 +32,7 @@ module vm64check ( input logic [`SVMODE_BITS-1:0] SATP_MODE, input logic [`XLEN-1:0] VAdr, output logic SV39Mode, - output logic UpperBitsUnequalPageFault + output logic UpperBitsUnequal ); if (`XLEN == 64) begin @@ -42,9 +42,9 @@ module vm64check ( logic eq_63_47, eq_46_38; assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]); assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]); - assign UpperBitsUnequalPageFault = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; + assign UpperBitsUnequal = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; end else begin assign SV39Mode = 0; - assign UpperBitsUnequalPageFault = 0; + assign UpperBitsUnequal = 0; end endmodule diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 7b765bae0..d97be53f6 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -37,13 +37,14 @@ module csr #(parameter input logic FlushM, FlushW, input logic StallE, StallM, StallW, input logic [31:0] InstrM, // current instruction - input logic [`XLEN-1:0] PCM, PCNext2F, // program counter, next PC going to trap/return logic + input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return logic input logic [`XLEN-1:0] SrcAM, IEUAdrM, // SrcA and memory address from IEU input logic CSRReadM, CSRWriteM, // read or write CSR input logic TrapM, // trap is occurring input logic mretM, sretM, wfiM, // return or WFI instruction input logic IntPendingM, // at least one interrupt is pending and could occur if enabled input logic InterruptM, // interrupt is occurring + input logic ExceptionM, // interrupt is occurring input logic MTimerInt, // timer interrupt input logic MExtInt, SExtInt, // external interrupt (from PLIC) input logic MSwInt, // software interrupt @@ -57,17 +58,23 @@ module csr #(parameter input logic SelHPTW, // hardware page table walker active, so base endianness on supervisor mode // inputs for performance counters input logic LoadStallD, - input logic DirPredictionWrongM, - input logic BTBPredPCWrongM, + input logic StoreStallD, + input logic ICacheStallF, + input logic DCacheStallM, + input logic BPDirPredWrongM, + input logic BTAWrongM, input logic RASPredPCWrongM, - input logic PredictionInstrClassWrongM, - input logic BPPredWrongM, // branch predictor is wrong + input logic IClassWrongM, + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic sfencevmaM, + input logic FenceM, + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy // outputs from CSRs output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, @@ -155,7 +162,7 @@ module csr #(parameter // A return sets the PC to MEPC or SEPC assign RetM = mretM | sretM; mux2 #(`XLEN) epcmux(SEPC_REGW, MEPC_REGW, mretM, EPC); - mux3 #(`XLEN) pcmux3(PCNext2F, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); + mux3 #(`XLEN) pcmux3(PC2NextF, EPC, TrapVectorM, {TrapM, RetM}, UnalignedPCNextF); /////////////////////////////////////////// // CSRWriteValM @@ -258,9 +265,10 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, - .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, .JumpOrTakenBranchM, .BPPredWrongM, - .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, + .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, + .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, + .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index d61835826..b4f89f188 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -43,20 +43,28 @@ module csrc #(parameter input logic clk, reset, input logic StallE, StallM, input logic FlushM, - input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, - input logic DirPredictionWrongM, - input logic BTBPredPCWrongM, + input logic InstrValidNotFlushedM, LoadStallD, StoreStallD, + input logic CSRMWriteM, CSRWriteM, + input logic BPDirPredWrongM, + input logic BTAWrongM, input logic RASPredPCWrongM, - input logic PredictionInstrClassWrongM, - input logic BPPredWrongM, // branch predictor is wrong + input logic IClassWrongM, + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic ICacheStallF, + input logic DCacheStallM, + input logic sfencevmaM, + input logic InterruptM, + input logic ExceptionM, + input logic FenceM, + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, input logic [63:0] MTIME_CLINT, @@ -68,6 +76,7 @@ module csrc #(parameter logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0]; logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0]; logic LoadStallE, LoadStallM; + logic StoreStallE, StoreStallM; logic [`COUNTERS-1:0] WriteHPMCOUNTERM; logic [`COUNTERS-1:0] CounterEvent; logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0]; @@ -75,8 +84,8 @@ module csrc #(parameter genvar i; // Interface signals - flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); // don't flush the load stall during a load stall. - flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); + flopenrc #(2) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d({StoreStallD, LoadStallD}), .q({StoreStallE, LoadStallE})); // don't flush the load stall during a load stall. + flopenrc #(2) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d({StoreStallE, LoadStallE}), .q({StoreStallM, LoadStallM})); // Determine when to increment each counter assign CounterEvent[0] = 1'b1; // MCYCLE always increments @@ -85,20 +94,29 @@ module csrc #(parameter if(`QEMU) begin: cevent // No other performance counters in QEMU assign CounterEvent[`COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters - assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = DirPredictionWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction - assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction - assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target - assign CounterEvent[7] = JumpOrTakenBranchM & InstrValidNotFlushedM; // jump or taken branch instructions - assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address - assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions - assign CounterEvent[10] = PredictionInstrClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong - assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; // data cache access - assign CounterEvent[12] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[13] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access - assign CounterEvent[14] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = BPPredWrongM & InstrValidNotFlushedM; // branch predictor wrong - assign CounterEvent[`COUNTERS-1:16] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions + assign CounterEvent[3] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction + assign CounterEvent[4] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions + assign CounterEvent[5] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions + assign CounterEvent[6] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong + assign CounterEvent[7] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction + assign CounterEvent[8] = BTAWrongM & InstrValidNotFlushedM; // branch predictor wrong target + assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address + assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong + assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. + assign CounterEvent[12] = StoreStallM & InstrValidNotFlushedM; // Store Stall + assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access + assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss + assign CounterEvent[15] = DCacheStallM; // d cache miss cycles + assign CounterEvent[16] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access + assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss + assign CounterEvent[18] = ICacheStallF; // i cache miss cycles + assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes + assign CounterEvent[20] = FenceM & InstrValidNotFlushedM; // fence.i + assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma + assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low + assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low + assign CounterEvent[24] = DivBusyE | FDivBusyE; // division cycles *** RT: might need to be delay until the next cycle + assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end // Counter update and write logic diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 99c56ad1b..253d02457 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -86,8 +86,8 @@ module csrs #(parameter assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)) & InstrValidNotFlushedM; assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM) & InstrValidNotFlushedM; assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN) & InstrValidNotFlushedM; - assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & MCOUNTEREN_TM & InstrValidNotFlushedM; - assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & MCOUNTEREN_TM & (`XLEN == 32) & InstrValidNotFlushedM; + assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & InstrValidNotFlushedM; + assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & (`XLEN == 32) & InstrValidNotFlushedM; // CSRs flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); @@ -100,12 +100,14 @@ module csrs #(parameter else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported flopens #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], SCOUNTEREN_REGW); - if (`XLEN == 64) - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); - else begin - flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); - flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); - end + if (`SSTC_SUPPORTED) begin + if (`XLEN == 64) + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW); + else begin + flopenr #(`XLEN) STIMECMPreg(clk, reset, WriteSTIMECMPM, CSRWriteValM, STIMECMP_REGW[31:0]); + flopenr #(`XLEN) STIMECMPHreg(clk, reset, WriteSTIMECMPHM, CSRWriteValM, STIMECMP_REGW[63:32]); + end + end else assign STIMECMP_REGW = 0; // Supervisor timer interrupt logic // Spec is a bit peculiar - Machine timer interrupts are produced in CLINT, while Supervisor timer interrupts are in CSRs @@ -132,12 +134,12 @@ module csrs #(parameter if (PrivilegeModeW == `S_MODE & STATUS_TVM) IllegalCSRSAccessM = 1; end SCOUNTEREN:CSRSReadValM = {{(`XLEN-32){1'b0}}, SCOUNTEREN_REGW}; - STIMECMP: if (MCOUNTEREN_TM) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; + STIMECMP: if (`SSTC_SUPPORTED & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM = STIMECMP_REGW[`XLEN-1:0]; else begin CSRSReadValM = 0; IllegalCSRSAccessM = 1; end - STIMECMPH: if (MCOUNTEREN_TM & (`XLEN == 32)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; + STIMECMPH: if (`SSTC_SUPPORTED & (`XLEN == 32) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM)) CSRSReadValM[31:0] = STIMECMP_REGW[63:32]; else begin // not supported for RV64 CSRSReadValM = 0; IllegalCSRSAccessM = 1; diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 300da8a65..251dbb3d6 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -38,7 +38,7 @@ module privileged ( input logic [`XLEN-1:0] SrcAM, // GPR register to write input logic [31:0] InstrM, // Instruction input logic [`XLEN-1:0] IEUAdrM, // address from IEU - input logic [`XLEN-1:0] PCM, PCNext2F, // program counter, next PC going to trap/return PC logic + input logic [`XLEN-1:0] PCM, PC2NextF, // program counter, next PC going to trap/return PC logic // control signals input logic InstrValidM, // Current instruction is valid (not flushed) input logic CommittedM, CommittedF, // current instruction is using bus; don't interrupt @@ -46,17 +46,21 @@ module privileged ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic DirPredictionWrongM, // branch predictor guessed wrong directoin - input logic BTBPredPCWrongM, // branch predictor guessed wrong target - input logic RASPredPCWrongM, // return adddress stack guessed wrong target - input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class - input logic BPPredWrongM, // branch predictor is wrong + input logic StoreStallD, // store instruction is stalling + input logic ICacheStallF, // I cache stalled + input logic DCacheStallM, // D cache stalled + input logic BPDirPredWrongM, // branch predictor guessed wrong direction + input logic BTAWrongM, // branch predictor guessed wrong target + input logic RASPredPCWrongM, // return adddress stack guessed wrong target + input logic IClassWrongM, // branch predictor guessed wrong instruction class + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, // data cache miss input logic DCacheAccess, // data cache accessed (hit or miss) input logic ICacheMiss, // instruction cache miss input logic ICacheAccess, // instruction cache access + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy // fault sources input logic InstrAccessFaultF, // instruction access fault input logic LoadAccessFaultM, StoreAmoAccessFaultM, // load or store access fault @@ -84,6 +88,7 @@ module privileged ( // control outputs output logic RetM, TrapM, // return instruction, or trap output logic sfencevmaM, // sfence.vma instruction + input logic FenceM, // fence instruction output logic BigEndianM, // Use big endian in current privilege mode // Fault outputs output logic BreakpointFaultM, EcallFaultM, // breakpoint and Ecall traps should retire @@ -106,9 +111,9 @@ module privileged ( logic DelegateM; // trap should be delegated logic wfiM; // wait for interrupt instruction logic IntPendingM; // interrupt is pending, even if not enabled. ends wfi - logic InterruptM; // interrupt occuring - - + logic InterruptM; // interrupt occuring + logic ExceptionM; // Memory stage instruction caused a fault + // track the current privilege level privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW); @@ -121,12 +126,13 @@ module privileged ( // Control and Status Registers csr csr(.clk, .reset, .FlushM, .FlushW, .StallE, .StallM, .StallW, - .InstrM, .PCM, .SrcAM, .IEUAdrM, .PCNext2F, + .InstrM, .PCM, .SrcAM, .IEUAdrM, .PC2NextF, .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, - .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, - .DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredWrongM, - .PredictionInstrClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, + .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .BPWrongM, + .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, + .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS, @@ -149,7 +155,7 @@ module privileged ( .mretM, .sretM, .PrivilegeModeW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW, .STATUS_MIE, .STATUS_SIE, .InstrValidM, .CommittedM, .CommittedF, - .TrapM, .RetM, .wfiM, .InterruptM, .IntPendingM, .DelegateM, .WFIStallM, .CauseM); + .TrapM, .RetM, .wfiM, .InterruptM, .ExceptionM, .IntPendingM, .DelegateM, .WFIStallM, .CauseM); endmodule diff --git a/src/privileged/trap.sv b/src/privileged/trap.sv index d8ad28f56..1d98763fc 100644 --- a/src/privileged/trap.sv +++ b/src/privileged/trap.sv @@ -45,6 +45,7 @@ module trap ( output logic TrapM, // Trap is occurring output logic RetM, // Return instruction being executed output logic InterruptM, // Interrupt is occurring + output logic ExceptionM, // exception is occurring output logic IntPendingM, // Interrupt is pending, might occur if enabled output logic DelegateM, // Delegate trap to supervisor handler output logic WFIStallM, // Stall due to WFI instruction @@ -52,7 +53,6 @@ module trap ( ); logic MIntGlobalEnM, SIntGlobalEnM; // Global interupt enables - logic ExceptionM; // exception is occurring logic Committed; // LSU or IFU has committed to a bus operation that can't be interrupted logic BothInstrAccessFaultM; // instruction or HPTW ITLB fill caused an Instruction Access Fault logic [11:0] PendingIntsM, ValidIntsM, EnabledIntsM; // interrupts are pending, valid, or enabled diff --git a/src/wally/cvw.sv b/src/wally/cvw.sv index c4541b698..eb6146283 100644 --- a/src/wally/cvw.sv +++ b/src/wally/cvw.sv @@ -101,7 +101,7 @@ package cvw; parameter BPRED_SUPPORTED = `BPRED_SUPPORTED; parameter BPRED_TYPE = `BPRED_TYPE; parameter BPRED_SIZE = `BPRED_SIZE; - parameter HPTW_WRITES_SUPPORTED = `HPTW_WRITES_SUPPORTED; + parameter SVADU_SUPPORTED = `SVADU_SUPPORTED; // parameter = `; diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 02074f973..6c2d5816b 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -66,7 +66,7 @@ module wallypipelinedcore ( logic [`XLEN-1:0] PCFSpill, PCE, PCLinkE; logic [`XLEN-1:0] PCM; logic [`XLEN-1:0] CSRReadValW, MDUResultW; - logic [`XLEN-1:0] UnalignedPCNextF, PCNext2F; + logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF; logic [1:0] MemRWM; logic InstrValidD, InstrValidE, InstrValidM; logic InstrMisalignedFaultM; @@ -140,11 +140,11 @@ module wallypipelinedcore ( logic LSUHWRITE; logic LSUHREADY; - logic BPPredWrongE, BPPredWrongM; - logic DirPredictionWrongM; - logic BTBPredPCWrongM; + logic BPWrongE, BPWrongM; + logic BPDirPredWrongM; + logic BTAWrongM; logic RASPredPCWrongM; - logic PredictionInstrClassWrongM; + logic IClassWrongM; logic [3:0] InstrClassM; logic InstrAccessFaultF, HPTWInstrAccessFaultM; logic [2:0] LSUHSIZE; @@ -156,35 +156,36 @@ module wallypipelinedcore ( logic ICacheMiss; logic ICacheAccess; logic BreakpointFaultM, EcallFaultM; - logic InstrDAPageFaultF; + logic InstrUpdateDAF; logic BigEndianM; logic FCvtIntE; logic CommittedF; - logic JumpOrTakenBranchM; logic BranchD, BranchE, JumpD, JumpE; + logic FenceM; + logic DCacheStallM, ICacheStallF; // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidM, .InstrValidE, .InstrValidD, - .BranchD, .BranchE, .JumpD, .JumpE, + .BranchD, .BranchE, .JumpD, .JumpE, .ICacheStallF, // Fetch - .HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F, + .HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, .ICacheAccess, .ICacheMiss, // Execute - .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPPredWrongE, .BPPredWrongM, + .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPWrongE, .BPWrongM, // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, - .InstrD, .InstrM, .PCM, .InstrClassM, .DirPredictionWrongM, .JumpOrTakenBranchM, - .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM, + .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, + .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, // mmu management .PrivilegeModeW, .PTE, .PageType, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ITLBWriteF, .sfencevmaM, .ITLBMissF, // pmp/pma (inside mmu) signals. - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrDAPageFaultF); + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrUpdateDAF); // integer execution unit: integer register file, datapath and controller ieu ieu(.clk, .reset, @@ -208,7 +209,7 @@ module wallypipelinedcore ( // hazards .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .PCSrcE, - .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .StoreStallD); + .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .FenceM, .StoreStallD); lsu lsu( .clk, .reset, .StallM, .FlushM, .StallW, .FlushW, @@ -231,6 +232,7 @@ module wallypipelinedcore ( .STATUS_MPRV, // from csr .STATUS_MPP, // from csr .sfencevmaM, // connects to privilege + .DCacheStallM, // connects to privilege .LoadPageFaultM, // connects to privilege .StoreAmoPageFaultM, // connects to privilege .LoadMisalignedFaultM, // connects to privilege @@ -238,7 +240,7 @@ module wallypipelinedcore ( .HPTWInstrAccessFaultM, // connects to privilege .StoreAmoMisalignedFaultM, // connects to privilege .StoreAmoAccessFaultM, // connects to privilege - .InstrDAPageFaultF, + .InstrUpdateDAF, .PCFSpill, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, .LSUStallM); @@ -268,7 +270,7 @@ module wallypipelinedcore ( // global stall and flush control hazard hzu( - .BPPredWrongE, .CSRWriteFenceM, .RetM, .TrapM, + .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF, .FCvtIntStallD, .FPUStallD, @@ -284,14 +286,14 @@ module wallypipelinedcore ( privileged priv( .clk, .reset, .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, - .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PCNext2F, + .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, .InstrM, .CSRReadValW, .UnalignedPCNextF, - .RetM, .TrapM, .sfencevmaM, + .RetM, .TrapM, .sfencevmaM, .FenceM, .DCacheStallM, .ICacheStallF, .InstrValidM, .CommittedM, .CommittedF, - .FRegWriteM, .LoadStallD, - .DirPredictionWrongM, .BTBPredPCWrongM, .BPPredWrongM, - .RASPredPCWrongM, .PredictionInstrClassWrongM, - .InstrClassM, .JumpOrTakenBranchM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, + .FRegWriteM, .LoadStallD, .StoreStallD, + .BPDirPredWrongM, .BTAWrongM, .BPWrongM, + .RASPredPCWrongM, .IClassWrongM, .DivBusyE, .FDivBusyE, + .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, .InstrMisalignedFaultM, .IllegalIEUFPUInstrD, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, @@ -304,7 +306,7 @@ module wallypipelinedcore ( .FRM_REGW,.BreakpointFaultM, .EcallFaultM, .WFIStallM, .BigEndianM); end else begin assign CSRReadValW = 0; - assign UnalignedPCNextF = PCNext2F; + assign UnalignedPCNextF = PC2NextF; assign RetM = 0; assign TrapM = 0; assign WFIStallM = 0; @@ -313,7 +315,7 @@ module wallypipelinedcore ( end // multiply/divide unit - if (`M_SUPPORTED) begin:mdu + if (`M_SUPPORTED | `ZMMUL_SUPPORTED) begin:mdu mdu mdu(.clk, .reset, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, diff --git a/synthDC/Makefile b/synthDC/Makefile index 136e610d2..d29c11332 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -12,7 +12,7 @@ export MOD ?= orig # title to add a note in the synth's directory name TITLE = # tsmc28, sky130, and sky90 presently supported -export TECH ?= tsmc28 +export TECH ?= sky90 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient # Avoid when doing sweeps of many optimization points in parallel export MAXCORES ?= 1 diff --git a/testbench/common/riscvassertions.sv b/testbench/common/riscvassertions.sv index f6cb4c6f6..f733aac58 100644 --- a/testbench/common/riscvassertions.sv +++ b/testbench/common/riscvassertions.sv @@ -23,40 +23,42 @@ module riscvassertions; initial begin - assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); - assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); - assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); - assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); - assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); - assert (`DCACHE_SUPPORTED | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); + $display("IDIV_ON_FPU = %b M_SUPPORTED %b comb %b\n", `IDIV_ON_FPU, `M_SUPPORTED, ((`IDIV_ON_FPU) || (!`M_SUPPORTED))); + assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); + assert (`S_SUPPORTED || `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support"); + assert (`IDIV_BITSPERCYCLE == 1 || `IDIV_BITSPERCYCLE==2 || `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4"); + assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)"); + assert (`D_SUPPORTED || ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)"); + assert (`F_SUPPORTED || ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)"); + assert (`DCACHE_SUPPORTED || ~`F_SUPPORTED || `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN"); assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); - assert (`FLEN<=`XLEN | `DCACHE_SUPPORTED | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); - assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`FLEN<=`XLEN || `DCACHE_SUPPORTED || `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); + assert (`DCACHE_WAYSIZEINBYTES <= 4096 || (!`DCACHE_SUPPORTED) || `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`DCACHE_LINELENINBITS >= 128 || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE_SUPPORTED) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 || (!`ICACHE_SUPPORTED) || `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_LINELENINBITS >= 32 || (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES || (!`DCACHE_SUPPORTED)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS || (!`ICACHE_SUPPORTED)) else $error("ICACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES || (!`ICACHE_SUPPORTED)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES || `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES || `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); - assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); - assert (`DCACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`ICACHE_SUPPORTED | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - assert ((`DCACHE_SUPPORTED == 0 & `ICACHE_SUPPORTED == 0) | `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); - assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); + assert (`ZICSR_SUPPORTED == 1 || (`PMP_ENTRIES == 0 && `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); + assert (`ZICSR_SUPPORTED == 1 || (`S_SUPPORTED == 0 && `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZICSR not supported"); + assert (`U_SUPPORTED || (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); + assert (`VIRTMEM_SUPPORTED == 0 || (`DTIM_SUPPORTED == 0 && `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); + assert (`DCACHE_SUPPORTED || `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); + assert (`ICACHE_SUPPORTED || `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); + assert ((`DCACHE_SUPPORTED == 0 && `ICACHE_SUPPORTED == 0) || `BUS_SUPPORTED) else $error("Dcache and Icache requires DBUS_SUPPORTED."); + assert (`DCACHE_LINELENINBITS <= `XLEN*16 || (!`DCACHE_SUPPORTED)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); - assert (`DCACHE_SUPPORTED | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); - assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); - assert (`SSTC_SUPPORTED == 0 | (`S_SUPPORTED)) else $error("SSTC requires S_SUPPORTED"); + assert (`DCACHE_SUPPORTED || (`A_SUPPORTED == 0)) else $error("Atomic extension (A) requires cache on Wally."); + assert (`IDIV_ON_FPU == 0 || `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); + assert (`SSTC_SUPPORTED == 0 || (`S_SUPPORTED)) else $error("SSTC requires S_SUPPORTED"); + assert ((`ZMMUL_SUPPORTED == 0) || (`M_SUPPORTED ==0)) else $error("At most one of ZMMUL_SUPPORTED and M_SUPPORTED can be enabled"); end endmodule diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index f26339007..442edec22 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -137,17 +137,24 @@ module testbench; .CMP_CSR (1) ) idv_trace2api(rvvi); + int PRIV_RWX = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE | RVVI_MEMORY_PRIVILEGE_EXEC; + int PRIV_RW = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE; + int PRIV_X = RVVI_MEMORY_PRIVILEGE_EXEC; + initial begin + MAX_ERRS = 3; // Initialize REF (do this before initializing the DUT) if (!rvviVersionCheck(RVVI_API_VERSION)) begin msgfatal($sformatf("%m @ t=%0t: Expecting RVVI API version %0d.", $time, RVVI_API_VERSION)); end - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org")); - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv")); - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GC")); - void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 39)); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org")); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv")); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GC")); + void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 39)); + void'(rvviRefConfigSetInt(IDV_CONFIG_MAX_NET_LATENCY_RETIREMENTS, 6)); + if (!rvviRefInit(elffilename)) begin msgfatal($sformatf("%m @ t=%0t: rvviRefInit failed", $time)); end @@ -158,6 +165,41 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hC02)); // INSTRET void'(rvviRefCsrSetVolatile(0, 32'hB02)); // MINSTRET void'(rvviRefCsrSetVolatile(0, 32'hC01)); // TIME + + // cannot predict this register due to latency between + // pending and taken + void'(rvviRefCsrSetVolatile(0, 32'h344)); // MIP + void'(rvviRefCsrSetVolatile(0, 32'h144)); // SIP + + // Memory lo, hi, priv (RVVI_MEMORY_PRIVILEGE_{READ,WRITE,EXEC}) + void'(rvviRefMemorySetPrivilege(56'h0, 56'h7fffffffff, 0)); + if (`BOOTROM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`BOOTROM_BASE, (`BOOTROM_BASE + `BOOTROM_RANGE), PRIV_X)); + if (`UNCORE_RAM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`UNCORE_RAM_BASE, (`UNCORE_RAM_BASE + `UNCORE_RAM_RANGE), PRIV_RWX)); + if (`EXT_MEM_SUPPORTED) + void'(rvviRefMemorySetPrivilege(`EXT_MEM_BASE, (`EXT_MEM_BASE + `EXT_MEM_RANGE), PRIV_RWX)); + + if (`CLINT_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE))); + end + if (`GPIO_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE))); + end + if (`UART_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`UART_BASE, (`UART_BASE + `UART_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`UART_BASE, (`UART_BASE + `UART_RANGE))); + end + if (`PLIC_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE))); + end + if (`SDC_SUPPORTED) begin + void'(rvviRefMemorySetPrivilege(`SDC_BASE, (`SDC_BASE + `SDC_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`SDC_BASE, (`SDC_BASE + `SDC_RANGE))); + end if(`XLEN==32) begin void'(rvviRefCsrSetVolatile(0, 32'hC80)); // CYCLEH @@ -166,16 +208,24 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH end - // Enable the trace2log module - if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin - msgnote($sformatf("%m @ t=%0t: TRACE2LOG_ENABLE is %0d", $time, TRACE2LOG_ENABLE)); - end + void'(rvviRefCsrSetVolatile(0, 32'h104)); // SIE - Temporary!!!! - if ($value$plusargs("TRACE2COV_ENABLE=%d", TRACE2COV_ENABLE)) begin - msgnote($sformatf("%m @ t=%0t: TRACE2COV_ENABLE is %0d", $time, TRACE2COV_ENABLE)); - end + // These should be done in the attached client +// // Enable the trace2log module +// if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin +// msgnote($sformatf("%m @ t=%0t: TRACE2LOG_ENABLE is %0d", $time, TRACE2LOG_ENABLE)); +// end +// +// if ($value$plusargs("TRACE2COV_ENABLE=%d", TRACE2COV_ENABLE)) begin +// msgnote($sformatf("%m @ t=%0t: TRACE2COV_ENABLE is %0d", $time, TRACE2COV_ENABLE)); +// end end + always @(dut.core.MTimerInt) void'(rvvi.net_push("MTimerInterrupt", dut.core.MTimerInt)); + always @(dut.core.MExtInt) void'(rvvi.net_push("MExternalInterrupt", dut.core.MExtInt)); + always @(dut.core.SExtInt) void'(rvvi.net_push("SExternalInterrupt", dut.core.SExtInt)); + always @(dut.core.MSwInt) void'(rvvi.net_push("MSWInterrupt", dut.core.MSwInt)); + final begin void'(rvviRefShutdown()); end