From 3d1ffac7d7c31353ac6fd3c5048b11a802501b8f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 1 Mar 2023 16:40:42 -0600 Subject: [PATCH 01/15] Cleaned up branch predictor performance counters. --- config/rv32gc/wally-config.vh | 2 +- sim/sim-imperas | 2 +- src/ifu/bpred/bpred.sv | 9 ++------- src/ifu/ifu.sv | 5 ++--- src/privileged/csr.sv | 3 +-- src/privileged/csrc.sv | 5 ++--- src/privileged/privileged.sv | 3 +-- src/wally/wallypipelinedcore.sv | 5 ++--- 8 files changed, 12 insertions(+), 22 deletions(-) diff --git a/config/rv32gc/wally-config.vh b/config/rv32gc/wally-config.vh index ac68e3ee..d7475cdb 100644 --- a/config/rv32gc/wally-config.vh +++ b/config/rv32gc/wally-config.vh @@ -134,7 +134,7 @@ `define BPRED_SUPPORTED 1 `define BPRED_TYPE "BP_GSHARE" // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT -`define BPRED_SIZE 10 +`define BPRED_SIZE 16 `define BTB_SIZE 10 `define SVADU_SUPPORTED 0 diff --git a/sim/sim-imperas b/sim/sim-imperas index b6a7f1c4..aa1dc3a0 100755 --- a/sim/sim-imperas +++ b/sim/sim-imperas @@ -29,4 +29,4 @@ IMPERAS_TOOLS=$(pwd)/imperas.ic \ OTHERFLAGS="+TRACE2LOG_ENABLE=1 VERBOSE=1" \ TESTDIR=${WALLY}/tests/riscof/work/wally-riscv-arch-test/rv64i_m/privilege/src/Lee.S/ \ -vsim -do "do wally-pipelined-imperas.do rv64gc" +vsim -do "do wally-imperas.do rv64gc" diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 92da8622..626da896 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -59,7 +59,6 @@ module bpred ( input logic [`XLEN-1:0] IEUAdrM, // The branch/jump target address input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br - output logic JumpOrTakenBranchM, // The valid instruction class. 1-hot encoded as call, return, jr (not return), j, br // Report branch prediction status output logic BPWrongE, // Prediction is wrong @@ -196,7 +195,6 @@ module bpred ( else assign NextValidPCE = PCE; if(`ZICOUNTERS_SUPPORTED) begin - logic JumpOrTakenBranchE; logic [`XLEN-1:0] RASPCD, RASPCE; logic BTBPredPCWrongE, RASPredPCWrongE; // performance counters @@ -209,13 +207,10 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. + // **** use BTAWrongM from BTB. assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; - assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; - - flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, @@ -223,7 +218,7 @@ module bpred ( {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); end else begin - assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; + assign {BTBPredPCWrongM, RASPredPCWrongM} = '0; end // **** Fix me diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index b049a956..78cd8c07 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -65,7 +65,6 @@ module ifu ( output logic [`XLEN-1:0] PCM, // Memory stage instruction address // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic JumpOrTakenBranchM, output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong @@ -88,7 +87,7 @@ module ifu ( input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits + output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit output logic InstrAccessFaultF, // Instruction access fault @@ -331,7 +330,7 @@ module ifu ( .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE, .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, - .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPWrongM, + .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .BPWrongM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index a54c05b2..58d00d0a 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -63,7 +63,6 @@ module csr #(parameter input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, @@ -259,7 +258,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .JumpOrTakenBranchM, .BPWrongM, + .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index f2b4b0e7..6ace0de3 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -50,7 +50,6 @@ module csrc #(parameter input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, input logic DCacheAccess, input logic ICacheMiss, @@ -86,10 +85,10 @@ module csrc #(parameter assign CounterEvent[`COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction + assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target - assign CounterEvent[7] = JumpOrTakenBranchM & InstrValidNotFlushedM; // jump or taken branch instructions + assign CounterEvent[7] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index f7a3caad..3cecc6fc 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -52,7 +52,6 @@ module privileged ( input logic IClassWrongM, // branch predictor guessed wrong instruction class input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class - input logic JumpOrTakenBranchM, // actual instruction class input logic DCacheMiss, // data cache miss input logic DCacheAccess, // data cache accessed (hit or miss) input logic ICacheMiss, // instruction cache miss @@ -126,7 +125,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .JumpOrTakenBranchM, + .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 8f888593..be85420f 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -160,7 +160,6 @@ module wallypipelinedcore ( logic BigEndianM; logic FCvtIntE; logic CommittedF; - logic JumpOrTakenBranchM; logic BranchD, BranchE, JumpD, JumpE; // instruction fetch unit: PC, branch prediction, instruction cache @@ -176,7 +175,7 @@ module wallypipelinedcore ( .PCLinkE, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCE, .BPWrongE, .BPWrongM, // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, - .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .JumpOrTakenBranchM, + .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, @@ -291,7 +290,7 @@ module wallypipelinedcore ( .FRegWriteM, .LoadStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, .RASPredPCWrongM, .IClassWrongM, - .InstrClassM, .JumpOrTakenBranchM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, + .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, .InstrMisalignedFaultM, .IllegalIEUFPUInstrD, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, From 1169567219e64e0c09d97fe75976691100901737 Mon Sep 17 00:00:00 2001 From: eroom1966 Date: Thu, 2 Mar 2023 15:25:27 +0000 Subject: [PATCH 02/15] fix the memory map privileges in the REF model view --- sim/imperas.ic | 1 + testbench/testbench_imperas.sv | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/sim/imperas.ic b/sim/imperas.ic index 69d57f70..5e3357ea 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -5,6 +5,7 @@ --override cpu/show_c_prefix=T --override cpu/unaligned=F --override cpu/ignore_non_leaf_DAU=1 +--override cpu/wfi_is_nop=T # Enable the Imperas instruction coverage #-extlib refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0 diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index cbca4927..442edec2 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -149,10 +149,12 @@ module testbench; if (!rvviVersionCheck(RVVI_API_VERSION)) begin msgfatal($sformatf("%m @ t=%0t: Expecting RVVI API version %0d.", $time, RVVI_API_VERSION)); end - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org")); - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv")); - void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GC")); - void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 39)); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org")); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv")); + void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GC")); + void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 39)); + void'(rvviRefConfigSetInt(IDV_CONFIG_MAX_NET_LATENCY_RETIREMENTS, 6)); + if (!rvviRefInit(elffilename)) begin msgfatal($sformatf("%m @ t=%0t: rvviRefInit failed", $time)); end @@ -166,8 +168,8 @@ module testbench; // cannot predict this register due to latency between // pending and taken - void'(rvviRefCsrSetVolatile(0, 32'h344)); - rvviRefCsrCompareEnable(0, 32'h344, RVVI_FALSE); + void'(rvviRefCsrSetVolatile(0, 32'h344)); // MIP + void'(rvviRefCsrSetVolatile(0, 32'h144)); // SIP // Memory lo, hi, priv (RVVI_MEMORY_PRIVILEGE_{READ,WRITE,EXEC}) void'(rvviRefMemorySetPrivilege(56'h0, 56'h7fffffffff, 0)); @@ -187,8 +189,8 @@ module testbench; void'(rvviRefMemorySetVolatile(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE))); end if (`UART_SUPPORTED) begin - void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE))); - void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW)); + void'(rvviRefMemorySetPrivilege(`UART_BASE, (`UART_BASE + `UART_RANGE), PRIV_RW)); + void'(rvviRefMemorySetVolatile(`UART_BASE, (`UART_BASE + `UART_RANGE))); end if (`PLIC_SUPPORTED) begin void'(rvviRefMemorySetPrivilege(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE), PRIV_RW)); @@ -206,6 +208,8 @@ module testbench; void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH end + void'(rvviRefCsrSetVolatile(0, 32'h104)); // SIE - Temporary!!!! + // These should be done in the attached client // // Enable the trace2log module // if ($value$plusargs("TRACE2LOG_ENABLE=%d", TRACE2LOG_ENABLE)) begin @@ -217,6 +221,11 @@ module testbench; // end end + always @(dut.core.MTimerInt) void'(rvvi.net_push("MTimerInterrupt", dut.core.MTimerInt)); + always @(dut.core.MExtInt) void'(rvvi.net_push("MExternalInterrupt", dut.core.MExtInt)); + always @(dut.core.SExtInt) void'(rvvi.net_push("SExternalInterrupt", dut.core.SExtInt)); + always @(dut.core.MSwInt) void'(rvvi.net_push("MSWInterrupt", dut.core.MSwInt)); + final begin void'(rvviRefShutdown()); end From 9bac643db2cf6c2a4a5e7b78aa403008082ecb9f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 22:16:30 -0600 Subject: [PATCH 03/15] Added support for branch target buffer stats. --- bin/parseHPMC.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 7b695d34..2da17277 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -156,7 +156,7 @@ def GeometricAverage(benchmarks, field): return Product ** (1.0/index) def ComputeGeometricAverage(benchmarks): - fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR'] + fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR', 'CPI'] AllAve = {} for field in fields: Product = 1 @@ -205,9 +205,9 @@ if(sys.argv[1] == '-b'): for benchmark in benchmarkAll: (name, opt, config, dataDict) = benchmark if name+'_'+opt in benchmarkDict: - benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) + benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR'])) else: - benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] + benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])] size = len(benchmarkDict) index = 1 @@ -272,7 +272,9 @@ if(sys.argv[1] == '-b'): else: # steps 1 and 2 benchmarks = ProcessFile(sys.argv[1]) - ComputeAverage(benchmarks) + print(benchmarks[0]) + ComputeAll(benchmarks) + ComputeGeometricAverage(benchmarks) # 3 process into useful data # cache hit rates # cache fill time @@ -280,7 +282,6 @@ else: # hazard counts # CPI # instruction distribution - ComputeAll(benchmarks) for benchmark in benchmarks: printStats(benchmark) From 983e30dcb1cb3fe58c28f64f8acecfacf8782f8a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 22:32:13 -0600 Subject: [PATCH 04/15] Fixed bug in performance counter script. --- bin/parseHPMC.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 2da17277..5b131237 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -205,9 +205,9 @@ if(sys.argv[1] == '-b'): for benchmark in benchmarkAll: (name, opt, config, dataDict) = benchmark if name+'_'+opt in benchmarkDict: - benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR'])) + benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) else: - benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])] + benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] size = len(benchmarkDict) index = 1 @@ -248,11 +248,11 @@ if(sys.argv[1] == '-b'): dct[PredType] = (currSize, currPercent) print(dct) fig, axes = plt.subplots() - marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x'} - colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue'} + marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x'} + colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue'} for cat in dct: (x, y) = dct[cat] - x=[int(2**int(v)/4) for v in x] + x=[int(2**int(v)) for v in x] print(x, y) axes.plot(x,y, color=colors[cat]) axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat]) @@ -262,9 +262,9 @@ if(sys.argv[1] == '-b'): axes.legend(loc='upper left') axes.set_xscale("log") axes.set_ylabel('Prediction Accuracy') - axes.set_xlabel('Size (bytes)') - axes.set_xticks([16, 64, 256, 1024, 4096, 16384]) - axes.set_xticklabels([16, 64, 256, 1024, 4096, 16384]) + axes.set_xlabel('Entries') + axes.set_xticks([64, 256, 1024, 4096, 16384, 65536]) + axes.set_xticklabels([64, 256, 1024, 4096, 16384, 65536]) axes.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5) plt.show() From e257ec96ac733b8aa580ab3f2141dad4d15cef53 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:04:31 -0600 Subject: [PATCH 05/15] Reordered performance counters and added space for new ones. --- src/privileged/csrc.sv | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 6ace0de3..2ea0a054 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -84,20 +84,29 @@ module csrc #(parameter if(`QEMU) begin: cevent // No other performance counters in QEMU assign CounterEvent[`COUNTERS-1:3] = 0; end else begin: cevent // User-defined counters - assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[4] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction - assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction - assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target - assign CounterEvent[7] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions - assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address - assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions + assign CounterEvent[3] = InstrClassM[0] & InstrValidNotFlushedM; // branch instruction + assign CounterEvent[4] = InstrClassM[1] & ~InstrClassM[2] & InstrValidNotFlushedM; // jump and not return instructions + assign CounterEvent[5] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions + assign CounterEvent[6] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong + assign CounterEvent[7] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction + assign CounterEvent[8] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target + assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong - assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; // data cache access - assign CounterEvent[12] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[13] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access - assign CounterEvent[14] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong - assign CounterEvent[`COUNTERS-1:16] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions + assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. + assign CounterEvent[12] = '0 & InstrValidNotFlushedM; // /// ********** store + assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access + assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss + assign CounterEvent[15] = '0; // //// ******* d cache miss cycles + assign CounterEvent[16] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access + assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss + assign CounterEvent[18] = '0; // //// ******** i cache miss cycles + assign CounterEvent[19] = '0; // ******** CSR writes + assign CounterEvent[20] = '0; // ******** fence.i + assign CounterEvent[21] = '0; // ******** sfence.vma + assign CounterEvent[22] = '0; // ******** # interrupts + assign CounterEvent[23] = '0; // ******** # exceptions + assign CounterEvent[24] = '0; // ******** # division cycles + assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end // Counter update and write logic From cf4d8e6bd0904c8a97539b449eb03a39141b2ef1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:10:54 -0600 Subject: [PATCH 06/15] Added store stall to performance counters. --- src/privileged/csr.sv | 3 ++- src/privileged/csrc.sv | 13 +++++++------ src/privileged/privileged.sv | 13 +++++++------ src/wally/wallypipelinedcore.sv | 2 +- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 58d00d0a..50951bd0 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -57,6 +57,7 @@ module csr #(parameter input logic SelHPTW, // hardware page table walker active, so base endianness on supervisor mode // inputs for performance counters input logic LoadStallD, + input logic StoreStallD, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -257,7 +258,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, - .InstrValidNotFlushedM, .LoadStallD, .CSRMWriteM, + .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 2ea0a054..7a166eff 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -43,7 +43,7 @@ module csrc #(parameter input logic clk, reset, input logic StallE, StallM, input logic FlushM, - input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, + input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, StoreStallD, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -55,7 +55,7 @@ module csrc #(parameter input logic ICacheMiss, input logic ICacheAccess, input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, input logic [63:0] MTIME_CLINT, @@ -67,6 +67,7 @@ module csrc #(parameter logic [`XLEN-1:0] HPMCOUNTER_REGW[`COUNTERS-1:0]; logic [`XLEN-1:0] HPMCOUNTERH_REGW[`COUNTERS-1:0]; logic LoadStallE, LoadStallM; + logic StoreStallE, StoreStallM; logic [`COUNTERS-1:0] WriteHPMCOUNTERM; logic [`COUNTERS-1:0] CounterEvent; logic [63:0] HPMCOUNTERPlusM[`COUNTERS-1:0]; @@ -74,8 +75,8 @@ module csrc #(parameter genvar i; // Interface signals - flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); // don't flush the load stall during a load stall. - flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); + flopenrc #(2) LoadStallEReg(.clk, .reset, .clear(1'b0), .en(~StallE), .d({StoreStallD, LoadStallD}), .q({StoreStallE, LoadStallE})); // don't flush the load stall during a load stall. + flopenrc #(2) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d({StoreStallE, LoadStallE}), .q({StoreStallM, LoadStallM})); // Determine when to increment each counter assign CounterEvent[0] = 1'b1; // MCYCLE always increments @@ -92,8 +93,8 @@ module csrc #(parameter assign CounterEvent[8] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong - assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[12] = '0 & InstrValidNotFlushedM; // /// ********** store + assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. + assign CounterEvent[12] = StoreStallM & InstrValidNotFlushedM; // Store Stall assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[15] = '0; // //// ******* d cache miss cycles diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 3cecc6fc..5f6c1775 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -46,11 +46,12 @@ module privileged ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic BPDirPredWrongM, // branch predictor guessed wrong directoin - input logic BTBPredPCWrongM, // branch predictor guessed wrong target - input logic RASPredPCWrongM, // return adddress stack guessed wrong target - input logic IClassWrongM, // branch predictor guessed wrong instruction class - input logic BPWrongM, // branch predictor is wrong + input logic StoreStallD, // load instruction is stalling + input logic BPDirPredWrongM, // branch predictor guessed wrong direction + input logic BTBPredPCWrongM, // branch predictor guessed wrong target + input logic RASPredPCWrongM, // return adddress stack guessed wrong target + input logic IClassWrongM, // branch predictor guessed wrong instruction class + input logic BPWrongM, // branch predictor is wrong input logic [3:0] InstrClassM, // actual instruction class input logic DCacheMiss, // data cache miss input logic DCacheAccess, // data cache accessed (hit or miss) @@ -123,7 +124,7 @@ module privileged ( .InstrM, .PCM, .SrcAM, .IEUAdrM, .PC2NextF, .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, - .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, + .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index be85420f..6acd9692 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -287,7 +287,7 @@ module wallypipelinedcore ( .InstrM, .CSRReadValW, .UnalignedPCNextF, .RetM, .TrapM, .sfencevmaM, .InstrValidM, .CommittedM, .CommittedF, - .FRegWriteM, .LoadStallD, + .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, .RASPredPCWrongM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, From 3dbfa96aefcaddcad9245b02e3dc6af188119514 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:21:29 -0600 Subject: [PATCH 07/15] Added csr write counter, sfence vma counter, interrupt counter, and exception counter. --- src/privileged/csr.sv | 7 +++++-- src/privileged/csrc.sv | 16 ++++++++++------ src/privileged/privileged.sv | 9 +++++---- src/privileged/trap.sv | 2 +- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 50951bd0..2630e0f7 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -44,6 +44,7 @@ module csr #(parameter input logic mretM, sretM, wfiM, // return or WFI instruction input logic IntPendingM, // at least one interrupt is pending and could occur if enabled input logic InterruptM, // interrupt is occurring + input logic ExceptionM, // interrupt is occurring input logic MTimerInt, // timer interrupt input logic MExtInt, SExtInt, // external interrupt (from PLIC) input logic MSwInt, // software interrupt @@ -68,6 +69,7 @@ module csr #(parameter input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic sfencevmaM, // outputs from CSRs output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, @@ -258,9 +260,10 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, - .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRMWriteM, + .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, - .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, + .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, + .InterruptM, .ExceptionM, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 7a166eff..131be4ec 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -43,7 +43,8 @@ module csrc #(parameter input logic clk, reset, input logic StallE, StallM, input logic FlushM, - input logic InstrValidNotFlushedM, LoadStallD, CSRMWriteM, StoreStallD, + input logic InstrValidNotFlushedM, LoadStallD, StoreStallD, + input logic CSRMWriteM, CSRWriteM, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -54,6 +55,9 @@ module csrc #(parameter input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic sfencevmaM, + input logic InterruptM, + input logic ExceptionM, input logic [11:0] CSRAdrM, input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, @@ -100,12 +104,12 @@ module csrc #(parameter assign CounterEvent[15] = '0; // //// ******* d cache miss cycles assign CounterEvent[16] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[18] = '0; // //// ******** i cache miss cycles - assign CounterEvent[19] = '0; // ******** CSR writes + assign CounterEvent[18] = '0; // //// ******** i cache miss cycles + assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes assign CounterEvent[20] = '0; // ******** fence.i - assign CounterEvent[21] = '0; // ******** sfence.vma - assign CounterEvent[22] = '0; // ******** # interrupts - assign CounterEvent[23] = '0; // ******** # exceptions + assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma + assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low + assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low assign CounterEvent[24] = '0; // ******** # division cycles assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 5f6c1775..679d13bd 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -106,9 +106,9 @@ module privileged ( logic DelegateM; // trap should be delegated logic wfiM; // wait for interrupt instruction logic IntPendingM; // interrupt is pending, even if not enabled. ends wfi - logic InterruptM; // interrupt occuring - - + logic InterruptM; // interrupt occuring + logic ExceptionM; // Memory stage instruction caused a fault + // track the current privilege level privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW); @@ -126,6 +126,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, + .sfencevmaM, .ExceptionM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, @@ -149,7 +150,7 @@ module privileged ( .mretM, .sretM, .PrivilegeModeW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW, .STATUS_MIE, .STATUS_SIE, .InstrValidM, .CommittedM, .CommittedF, - .TrapM, .RetM, .wfiM, .InterruptM, .IntPendingM, .DelegateM, .WFIStallM, .CauseM); + .TrapM, .RetM, .wfiM, .InterruptM, .ExceptionM, .IntPendingM, .DelegateM, .WFIStallM, .CauseM); endmodule diff --git a/src/privileged/trap.sv b/src/privileged/trap.sv index d8ad28f5..1d98763f 100644 --- a/src/privileged/trap.sv +++ b/src/privileged/trap.sv @@ -45,6 +45,7 @@ module trap ( output logic TrapM, // Trap is occurring output logic RetM, // Return instruction being executed output logic InterruptM, // Interrupt is occurring + output logic ExceptionM, // exception is occurring output logic IntPendingM, // Interrupt is pending, might occur if enabled output logic DelegateM, // Delegate trap to supervisor handler output logic WFIStallM, // Stall due to WFI instruction @@ -52,7 +53,6 @@ module trap ( ); logic MIntGlobalEnM, SIntGlobalEnM; // Global interupt enables - logic ExceptionM; // exception is occurring logic Committed; // LSU or IFU has committed to a bus operation that can't be interrupted logic BothInstrAccessFaultM; // instruction or HPTW ITLB fill caused an Instruction Access Fault logic [11:0] PendingIntsM, ValidIntsM, EnabledIntsM; // interrupts are pending, valid, or enabled From b19d51b6a292df6d3d5c4391b48e287f1a4c5dff Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:29:20 -0600 Subject: [PATCH 08/15] Added fence counter. --- src/ieu/controller.sv | 13 +++++++------ src/ieu/ieu.sv | 5 +++-- src/privileged/csr.sv | 3 ++- src/privileged/csrc.sv | 3 ++- src/privileged/privileged.sv | 3 ++- src/wally/wallypipelinedcore.sv | 5 +++-- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 51284704..8e89656a 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -38,7 +38,9 @@ module controller( output logic [2:0] ImmSrcD, // Type of immediate extension input logic IllegalIEUFPUInstrD, // Illegal IEU and FPU instruction output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers - // Execute stage control signals + output logic JumpD, // Jump instruction + output logic BranchD, // Branch instruction + // Execute stage control signals input logic StallE, FlushE, // Stall, flush Execute stage input logic [1:0] FlagsE, // Comparison flags ({eq, lt}) input logic FWriteIntE, // Write integer register, coming from FPU controller @@ -51,7 +53,8 @@ module controller( output logic IntDivE, // Integer divide output logic MDUE, // MDU (multiply/divide) operatio output logic W64E, // RV64 W-type operation - output logic JumpE, // jump instruction + output logic JumpE, // jump instruction + output logic BranchE, // Branch instruction output logic SCE, // Store Conditional instruction output logic BranchSignedE, // Branch comparison operands are signed (if it's a branch) // Memory stage control signals @@ -63,9 +66,7 @@ module controller( output logic RegWriteM, // Instruction writes a register (needed for Hazard unit) output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$ output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid - output logic BranchD, BranchE, - output logic JumpD, - + output logic FenceM, // Fence instruction output logic FWriteIntM, // FPU controller writes integer register file // Writeback stage control signals input logic StallW, FlushW, // Stall, flush Writeback stage @@ -109,7 +110,7 @@ module controller( logic IEURegWriteE; // Register write logic IllegalERegAdrD; // RV32E attempts to write upper 16 registers logic [1:0] AtomicE; // Atomic instruction - logic FenceD, FenceE, FenceM; // Fence instruction + logic FenceD, FenceE; // Fence instruction logic SFenceVmaD; // sfence.vma instruction logic IntDivM; // Integer divide instruction diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 346594eb..85423374 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -71,7 +71,8 @@ module ieu ( output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit output logic MDUStallD, CSRRdStallD, StoreStallD, output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction - output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions + output logic CSRWriteFenceM, // CSR write or fence instruction needs to flush subsequent instructions + output logic FenceM ); logic [2:0] ImmSrcD; // Select type of immediate extension @@ -99,7 +100,7 @@ module ieu ( .Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM, - .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD); + .StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .FenceM, .StoreStallD); datapath dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 2630e0f7..ce9acb6e 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -70,6 +70,7 @@ module csr #(parameter input logic ICacheMiss, input logic ICacheAccess, input logic sfencevmaM, + input logic FenceM, // outputs from CSRs output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, @@ -263,7 +264,7 @@ module csr #(parameter .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, - .InterruptM, .ExceptionM, + .InterruptM, .ExceptionM, .FenceM, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 131be4ec..85ce7f6f 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -58,6 +58,7 @@ module csrc #(parameter input logic sfencevmaM, input logic InterruptM, input logic ExceptionM, + input logic FenceM, input logic [11:0] CSRAdrM, input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, @@ -106,7 +107,7 @@ module csrc #(parameter assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss assign CounterEvent[18] = '0; // //// ******** i cache miss cycles assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes - assign CounterEvent[20] = '0; // ******** fence.i + assign CounterEvent[20] = FenceM & InstrValidNotFlushedM; // fence.i assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 679d13bd..14e7ce1d 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -84,6 +84,7 @@ module privileged ( // control outputs output logic RetM, TrapM, // return instruction, or trap output logic sfencevmaM, // sfence.vma instruction + input logic FenceM, // fence instruction output logic BigEndianM, // Use big endian in current privilege mode // Fault outputs output logic BreakpointFaultM, EcallFaultM, // breakpoint and Ecall traps should retire @@ -126,7 +127,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .sfencevmaM, .ExceptionM, + .sfencevmaM, .ExceptionM, .FenceM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 6acd9692..53ea756c 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -161,6 +161,7 @@ module wallypipelinedcore ( logic FCvtIntE; logic CommittedF; logic BranchD, BranchE, JumpD, JumpE; + logic FenceM; // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, @@ -207,7 +208,7 @@ module wallypipelinedcore ( // hazards .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .FCvtIntStallD, .LoadStallD, .MDUStallD, .CSRRdStallD, .PCSrcE, - .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .StoreStallD); + .CSRReadM, .CSRWriteM, .PrivilegedM, .CSRWriteFenceM, .FenceM, .StoreStallD); lsu lsu( .clk, .reset, .StallM, .FlushM, .StallW, .FlushW, @@ -285,7 +286,7 @@ module wallypipelinedcore ( .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, .InstrM, .CSRReadValW, .UnalignedPCNextF, - .RetM, .TrapM, .sfencevmaM, + .RetM, .TrapM, .sfencevmaM, .FenceM, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, From 4b501f6e030704f8ec414d9439f5fb085a598b9a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:54:56 -0600 Subject: [PATCH 09/15] Added the i and d cache cycle counters. --- src/ifu/ifu.sv | 6 +++--- src/lsu/lsu.sv | 2 +- src/privileged/csr.sv | 4 +++- src/privileged/csrc.sv | 6 ++++-- src/privileged/privileged.sv | 6 ++++-- src/wally/wallypipelinedcore.sv | 6 ++++-- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 78cd8c07..2f403ff5 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -65,10 +65,11 @@ module ifu ( output logic [`XLEN-1:0] PCM, // Memory stage instruction address // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br - output logic BPDirPredWrongM, // Prediction direction is wrong + output logic BPDirPredWrongM, // Prediction direction is wrong output logic BTBPredPCWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong - output logic IClassWrongM, // Class prediction is wrong + output logic IClassWrongM, // Class prediction is wrong + output logic ICacheStallF, // I$ busy with multicycle operation // Faults input logic IllegalBaseInstrD, // Illegal non-compressed instruction input logic IllegalFPUInstrD, // Illegal FP instruction @@ -127,7 +128,6 @@ module ifu ( logic CacheableF; // PMA indicates instruction address is cacheable logic SelNextSpillF; // In a spill, stall pipeline and gate local stallF logic BusStall; // Bus interface busy with multicycle operation - logic ICacheStallF; // I$ busy with multicycle operation logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill // branch predictor signal diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 18383e0d..9f11f700 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -54,6 +54,7 @@ module lsu ( input logic [1:0] PrivilegeModeW, // Current privilege mode input logic BigEndianM, // Swap byte order to big endian input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries + output logic DCacheStallM, // D$ busy with multicycle operation // fpu input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU input logic FpLoadStoreM, // Selects FPU as store for write data @@ -103,7 +104,6 @@ module lsu ( logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1 - logic DCacheStallM; // D$ busy with multicycle operation logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index ce9acb6e..306463a6 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -59,6 +59,8 @@ module csr #(parameter // inputs for performance counters input logic LoadStallD, input logic StoreStallD, + input logic ICacheStallF, + input logic DCacheStallM, input logic BPDirPredWrongM, input logic BTBPredPCWrongM, input logic RASPredPCWrongM, @@ -264,7 +266,7 @@ module csr #(parameter .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, - .InterruptM, .ExceptionM, .FenceM, + .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index 85ce7f6f..bc2c2b96 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -55,6 +55,8 @@ module csrc #(parameter input logic DCacheAccess, input logic ICacheMiss, input logic ICacheAccess, + input logic ICacheStallF, + input logic DCacheStallM, input logic sfencevmaM, input logic InterruptM, input logic ExceptionM, @@ -102,10 +104,10 @@ module csrc #(parameter assign CounterEvent[12] = StoreStallM & InstrValidNotFlushedM; // Store Stall assign CounterEvent[13] = DCacheAccess & InstrValidNotFlushedM; // data cache access assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = '0; // //// ******* d cache miss cycles + assign CounterEvent[15] = DCacheStallM; // d cache miss cycles assign CounterEvent[16] = ICacheAccess & InstrValidNotFlushedM; // instruction cache access assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[18] = '0; // //// ******** i cache miss cycles + assign CounterEvent[18] = ICacheStallF; // i cache miss cycles assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes assign CounterEvent[20] = FenceM & InstrValidNotFlushedM; // fence.i assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 14e7ce1d..0a85d52d 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -46,7 +46,9 @@ module privileged ( // processor events for performance counter logging input logic FRegWriteM, // instruction will write floating-point registers input logic LoadStallD, // load instruction is stalling - input logic StoreStallD, // load instruction is stalling + input logic StoreStallD, // store instruction is stalling + input logic ICacheStallF, // I cache stalled + input logic DCacheStallM, // D cache stalled input logic BPDirPredWrongM, // branch predictor guessed wrong direction input logic BTBPredPCWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target @@ -127,7 +129,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .sfencevmaM, .ExceptionM, .FenceM, + .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 53ea756c..567d10e7 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -162,12 +162,13 @@ module wallypipelinedcore ( logic CommittedF; logic BranchD, BranchE, JumpD, JumpE; logic FenceM; + logic DCacheStallM, ICacheStallF; // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .InstrValidM, .InstrValidE, .InstrValidD, - .BranchD, .BranchE, .JumpD, .JumpE, + .BranchD, .BranchE, .JumpD, .JumpE, .ICacheStallF, // Fetch .HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, @@ -231,6 +232,7 @@ module wallypipelinedcore ( .STATUS_MPRV, // from csr .STATUS_MPP, // from csr .sfencevmaM, // connects to privilege + .DCacheStallM, // connects to privilege .LoadPageFaultM, // connects to privilege .StoreAmoPageFaultM, // connects to privilege .LoadMisalignedFaultM, // connects to privilege @@ -286,7 +288,7 @@ module wallypipelinedcore ( .FlushD, .FlushE, .FlushM, .FlushW, .StallD, .StallE, .StallM, .StallW, .CSRReadM, .CSRWriteM, .SrcAM, .PCM, .PC2NextF, .InstrM, .CSRReadValW, .UnalignedPCNextF, - .RetM, .TrapM, .sfencevmaM, .FenceM, + .RetM, .TrapM, .sfencevmaM, .FenceM, .DCacheStallM, .ICacheStallF, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, From bdab2c8506c0fe697424f94ff3d6f91e6e4cd066 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 2 Mar 2023 23:59:52 -0600 Subject: [PATCH 10/15] Added divide cycle counter. --- src/privileged/csr.sv | 4 +++- src/privileged/csrc.sv | 4 +++- src/privileged/privileged.sv | 4 +++- src/wally/wallypipelinedcore.sv | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 306463a6..5fc5d18e 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -73,6 +73,8 @@ module csr #(parameter input logic ICacheAccess, input logic sfencevmaM, input logic FenceM, + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy // outputs from CSRs output logic [1:0] STATUS_MPP, output logic STATUS_SPP, STATUS_TSR, STATUS_TVM, @@ -266,7 +268,7 @@ module csr #(parameter .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, - .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, + .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, .MCOUNTINHIBIT_REGW, .MCOUNTEREN_REGW, .SCOUNTEREN_REGW, .MTIME_CLINT, .CSRCReadValM, .IllegalCSRCAccessM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index bc2c2b96..feab9404 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -61,6 +61,8 @@ module csrc #(parameter input logic InterruptM, input logic ExceptionM, input logic FenceM, + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy input logic [11:0] CSRAdrM, input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, @@ -113,7 +115,7 @@ module csrc #(parameter assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma assign CounterEvent[22] = InterruptM; // interrupt, InstrValidNotFlushedM will be low assign CounterEvent[23] = ExceptionM; // exceptions, InstrValidNotFlushedM will be low - assign CounterEvent[24] = '0; // ******** # division cycles + assign CounterEvent[24] = DivBusyE | FDivBusyE; // division cycles *** RT: might need to be delay until the next cycle assign CounterEvent[`COUNTERS-1:25] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 0a85d52d..fd4bb0f8 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -59,6 +59,8 @@ module privileged ( input logic DCacheAccess, // data cache accessed (hit or miss) input logic ICacheMiss, // instruction cache miss input logic ICacheAccess, // instruction cache access + input logic DivBusyE, // integer divide busy + input logic FDivBusyE, // floating point divide busy // fault sources input logic InstrAccessFaultF, // instruction access fault input logic LoadAccessFaultM, StoreAmoAccessFaultM, // load or store access fault @@ -129,7 +131,7 @@ module privileged ( .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, - .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, + .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TVM, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 567d10e7..4e68ce48 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -292,7 +292,7 @@ module wallypipelinedcore ( .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, - .RASPredPCWrongM, .IClassWrongM, + .RASPredPCWrongM, .IClassWrongM, .DivBusyE, .FDivBusyE, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, .InstrMisalignedFaultM, .IllegalIEUFPUInstrD, From 7dd8fa16c1d2a8229c1dea210077e169f9054ace Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 00:18:34 -0600 Subject: [PATCH 11/15] Renamed BTB misprediction to BTA. --- src/ifu/bpred/bpred.sv | 6 +++--- src/ifu/ifu.sv | 6 +++--- src/privileged/csr.sv | 4 ++-- src/privileged/csrc.sv | 4 ++-- src/privileged/privileged.sv | 4 ++-- src/wally/wallypipelinedcore.sv | 6 +++--- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 626da896..f2f16b51 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -64,7 +64,7 @@ module bpred ( output logic BPWrongE, // Prediction is wrong output logic BPWrongM, // Prediction is wrong output logic BPDirPredWrongM, // Prediction direction is wrong - output logic BTBPredPCWrongM, // Prediction target wrong + output logic BTAWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong output logic IClassWrongM // Class prediction is wrong ); @@ -215,10 +215,10 @@ module bpred ( flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, - {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); + {BPDirPredWrongM, BTAWrongM, RASPredPCWrongM}); end else begin - assign {BTBPredPCWrongM, RASPredPCWrongM} = '0; + assign {BTAWrongM, RASPredPCWrongM} = '0; end // **** Fix me diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 2f403ff5..2c2ee7b4 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -66,7 +66,7 @@ module ifu ( // branch predictor output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br output logic BPDirPredWrongM, // Prediction direction is wrong - output logic BTBPredPCWrongM, // Prediction target wrong + output logic BTAWrongM, // Prediction target wrong output logic RASPredPCWrongM, // RAS prediction is wrong output logic IClassWrongM, // Class prediction is wrong output logic ICacheStallF, // I$ busy with multicycle operation @@ -331,12 +331,12 @@ module ifu ( .BranchD, .BranchE, .JumpD, .JumpE, .InstrD, .PCNextF, .PCPlus2or4F, .PC1NextF, .PCE, .PCM, .PCSrcE, .IEUAdrE, .IEUAdrM, .PCF, .NextValidPCE, .PCD, .PCLinkE, .InstrClassM, .BPWrongE, .PostSpillInstrRawF, .BPWrongM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM); + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM); end else begin : bpred mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); assign BPWrongE = PCSrcE; - assign {InstrClassM, BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM, IClassWrongM} = '0; + assign {InstrClassM, BPDirPredWrongM, BTAWrongM, RASPredPCWrongM, IClassWrongM} = '0; assign NextValidPCE = PCE; end diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 5fc5d18e..d97be53f 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -62,7 +62,7 @@ module csr #(parameter input logic ICacheStallF, input logic DCacheStallM, input logic BPDirPredWrongM, - input logic BTBPredPCWrongM, + input logic BTAWrongM, input logic RASPredPCWrongM, input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong @@ -266,7 +266,7 @@ module csr #(parameter if (`ZICOUNTERS_SUPPORTED) begin:counters csrc counters(.clk, .reset, .StallE, .StallM, .FlushM, .InstrValidNotFlushedM, .LoadStallD, .StoreStallD, .CSRWriteM, .CSRMWriteM, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, .BPWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .sfencevmaM, .InterruptM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .CSRAdrM, .PrivilegeModeW, .CSRWriteValM, diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index feab9404..b4f89f18 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -46,7 +46,7 @@ module csrc #(parameter input logic InstrValidNotFlushedM, LoadStallD, StoreStallD, input logic CSRMWriteM, CSRWriteM, input logic BPDirPredWrongM, - input logic BTBPredPCWrongM, + input logic BTAWrongM, input logic RASPredPCWrongM, input logic IClassWrongM, input logic BPWrongM, // branch predictor is wrong @@ -99,7 +99,7 @@ module csrc #(parameter assign CounterEvent[5] = InstrClassM[2] & InstrValidNotFlushedM; // return instructions assign CounterEvent[6] = BPWrongM & InstrValidNotFlushedM; // branch predictor wrong assign CounterEvent[7] = BPDirPredWrongM & InstrValidNotFlushedM; // Branch predictor wrong direction - assign CounterEvent[8] = BTBPredPCWrongM & InstrValidNotFlushedM; // branch predictor wrong target + assign CounterEvent[8] = BTAWrongM & InstrValidNotFlushedM; // branch predictor wrong target assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong assign CounterEvent[11] = LoadStallM & InstrValidNotFlushedM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index fd4bb0f8..251dbb3d 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -50,7 +50,7 @@ module privileged ( input logic ICacheStallF, // I cache stalled input logic DCacheStallM, // D cache stalled input logic BPDirPredWrongM, // branch predictor guessed wrong direction - input logic BTBPredPCWrongM, // branch predictor guessed wrong target + input logic BTAWrongM, // branch predictor guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target input logic IClassWrongM, // branch predictor guessed wrong instruction class input logic BPWrongM, // branch predictor is wrong @@ -130,7 +130,7 @@ module privileged ( .CSRReadM, .CSRWriteM, .TrapM, .mretM, .sretM, .wfiM, .IntPendingM, .InterruptM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .InstrValidM, .FRegWriteM, .LoadStallD, .StoreStallD, - .BPDirPredWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPWrongM, + .BPDirPredWrongM, .BTAWrongM, .RASPredPCWrongM, .BPWrongM, .sfencevmaM, .ExceptionM, .FenceM, .ICacheStallF, .DCacheStallM, .DivBusyE, .FDivBusyE, .IClassWrongM, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .NextPrivilegeModeM, .PrivilegeModeW, .CauseM, .SelHPTW, diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 4e68ce48..6c2d5816 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -142,7 +142,7 @@ module wallypipelinedcore ( logic BPWrongE, BPWrongM; logic BPDirPredWrongM; - logic BTBPredPCWrongM; + logic BTAWrongM; logic RASPredPCWrongM; logic IClassWrongM; logic [3:0] InstrClassM; @@ -178,7 +178,7 @@ module wallypipelinedcore ( // Mem .CommittedF, .UnalignedPCNextF, .InvalidateICacheM, .CSRWriteFenceM, .InstrD, .InstrM, .PCM, .InstrClassM, .BPDirPredWrongM, - .BTBPredPCWrongM, .RASPredPCWrongM, .IClassWrongM, + .BTAWrongM, .RASPredPCWrongM, .IClassWrongM, // Faults out .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, // mmu management @@ -291,7 +291,7 @@ module wallypipelinedcore ( .RetM, .TrapM, .sfencevmaM, .FenceM, .DCacheStallM, .ICacheStallF, .InstrValidM, .CommittedM, .CommittedF, .FRegWriteM, .LoadStallD, .StoreStallD, - .BPDirPredWrongM, .BTBPredPCWrongM, .BPWrongM, + .BPDirPredWrongM, .BTAWrongM, .BPWrongM, .RASPredPCWrongM, .IClassWrongM, .DivBusyE, .FDivBusyE, .InstrClassM, .DCacheMiss, .DCacheAccess, .ICacheMiss, .ICacheAccess, .PrivilegedM, .InstrPageFaultF, .LoadPageFaultM, .StoreAmoPageFaultM, From f6e97cf5168cd4b4596b5750ac809ce24d94de84 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 10:42:52 -0600 Subject: [PATCH 12/15] Added performance new counter prints to testbench. --- testbench/testbench.sv | 261 ++++++++++++++++++++--------------------- 1 file changed, 125 insertions(+), 136 deletions(-) diff --git a/testbench/testbench.sv b/testbench/testbench.sv index e811ea4b..a37c101f 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -155,9 +155,9 @@ logic [3:0] dummy; logic HREADY; logic HSELEXT; - logic InitializingMemories; - integer ResetCount, ResetThreshold; - logic InReset; + logic InitializingMemories; + integer ResetCount, ResetThreshold; + logic InReset; // instantiate device to be tested assign GPIOPinsIn = 0; @@ -228,7 +228,7 @@ logic [3:0] dummy; // read test vectors into memory pathname = tvpaths[tests[0].atoi()]; /* if (tests[0] == `IMPERASTEST) - pathname = tvpaths[0]; + pathname = tvpaths[0]; else pathname = tvpaths[1]; */ if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"}; else memfilename = {pathname, tests[test], ".elf.memfile"}; @@ -268,9 +268,6 @@ logic [3:0] dummy; // if ($time % 100000 == 0) $display("Time is %0t", $time); end - logic [`XLEN-1:0] debugmemoryadr; -// assign debugmemoryadr = dut.uncore.uncore.ram.ram.memory.RAM[5140]; - // check results assign reset_ext = InReset; @@ -285,97 +282,97 @@ logic [3:0] dummy; ResetCount = 0; end end else begin - if (TEST == "coremark") - if (dut.core.priv.priv.EcallFaultM) begin - $display("Benchmark: coremark is done."); - $stop; - end - // Termination condition (i.e. we finished running current test) - if (DCacheFlushDone) begin - integer begin_signature_addr; - InReset = 1; - begin_signature_addr = ProgramAddrLabelArray["begin_signature"]; - if (!begin_signature_addr) - $display("begin_signature addr not found in %s", ProgramLabelMapFile); - testadr = ($unsigned(begin_signature_addr))/(`XLEN/8); - testadrNoBase = (begin_signature_addr - `UNCORE_RAM_BASE)/(`XLEN/8); - #600; // give time for instructions in pipeline to finish - if (TEST == "embench") begin - // Writes contents of begin_signature to .sim.output file - // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score - // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking - $display("Embench Benchmark: %s is done.", tests[test]); - if (riscofTest) outputfile = {pathname, tests[test], "/ref/ref.sim.output"}; - else outputfile = {pathname, tests[test], ".sim.output"}; - outputFilePointer = $fopen(outputfile); - i = 0; - while ($unsigned(i) < $unsigned(5'd5)) begin - $fdisplayh(outputFilePointer, DCacheFlushFSM.ShadowRAM[testadr+i]); - i = i + 1; - end - $fclose(outputFilePointer); - $display("Embench Benchmark: created output file: %s", outputfile); - end else begin - // for tests with no self checking mechanism, read .signature.output file and compare to check for errors - // clear signature to prevent contamination from previous tests - for(i=0; i Date: Fri, 3 Mar 2023 13:10:01 -0600 Subject: [PATCH 13/15] Setup the testbench to exclude the warmup from performance counter reports. --- bin/parseHPMC.py | 30 ++++++++++++----- sim/wave.do | 76 ++++++++++++++++++------------------------ testbench/testbench.sv | 45 +++++++++++++++++++------ 3 files changed, 89 insertions(+), 62 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 5b131237..1ce084fc 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -67,12 +67,24 @@ def ComputeICacheMissRate(benchmark): ICacheMR = 100.0 * int(dataDict['I Cache Miss']) / int(dataDict['I Cache Access']) dataDict['ICacheMR'] = ICacheMR +def ComputeICacheMissTime(benchmark): + 'Computes and inserts instruction class miss prediction rate.' + (nameString, opt, dataDict) = benchmark + ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / int(dataDict['I Cache Miss']) + dataDict['ICacheMT'] = ICacheMR + def ComputeDCacheMissRate(benchmark): 'Computes and inserts instruction class miss prediction rate.' (nameString, opt, dataDict) = benchmark DCacheMR = 100.0 * int(dataDict['D Cache Miss']) / int(dataDict['D Cache Access']) dataDict['DCacheMR'] = DCacheMR +def ComputeDCacheMissTime(benchmark): + 'Computes and inserts instruction class miss prediction rate.' + (nameString, opt, dataDict) = benchmark + ICacheMR = 100.0 * int(dataDict['D Cache Cycles']) / int(dataDict['D Cache Miss']) + dataDict['DCacheMT'] = ICacheMR + def ComputeAll(benchmarks): for benchmark in benchmarks: ComputeCPI(benchmark) @@ -81,23 +93,23 @@ def ComputeAll(benchmarks): ComputeRASMissRate(benchmark) ComputeInstrClassMissRate(benchmark) ComputeICacheMissRate(benchmark) + ComputeICacheMissTime(benchmark) ComputeDCacheMissRate(benchmark) + ComputeDCacheMissTime(benchmark) def printStats(benchmark): (nameString, opt, dataDict) = benchmark - CPI = dataDict['CPI'] - BDMR = dataDict['BDMR'] - BTMR = dataDict['BTMR'] - RASMPR = dataDict['RASMPR'] print('Test', nameString) print('Compile configuration', opt) - print('CPI \t\t\t %1.2f' % CPI) - print('Branch Dir Pred Miss Rate %2.2f' % BDMR) - print('Branch Target Pred Miss Rate %2.2f' % BTMR) - print('RAS Miss Rate \t\t %1.2f' % RASMPR) + print('CPI \t\t\t %1.2f' % dataDict['CPI']) + print('Branch Dir Pred Miss Rate %2.2f' % dataDict['BDMR']) + print('Branch Target Pred Miss Rate %2.2f' % dataDict['BTMR']) + print('RAS Miss Rate \t\t %1.2f' % dataDict['RASMPR']) print('Instr Class Miss Rate %1.2f' % dataDict['ClassMPR']) print('I Cache Miss Rate %1.4f' % dataDict['ICacheMR']) + print('I Cache Miss Ave Cycles %1.4f' % dataDict['ICacheMT']) print('D Cache Miss Rate %1.4f' % dataDict['DCacheMR']) + print('D Cache Miss Ave Cycles %1.4f' % dataDict['DCacheMT']) print() def ProcessFile(fileName): @@ -156,7 +168,7 @@ def GeometricAverage(benchmarks, field): return Product ** (1.0/index) def ComputeGeometricAverage(benchmarks): - fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR', 'CPI'] + fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR', 'CPI', 'ICacheMT', 'DCacheMT'] AllAve = {} for field in fields: Product = 1 diff --git a/sim/wave.do b/sim/wave.do index 23413957..d9e8869a 100644 --- a/sim/wave.do +++ b/sim/wave.do @@ -6,7 +6,6 @@ add wave -noupdate /testbench/reset add wave -noupdate /testbench/reset_ext add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/dut/core/SATP_REGW -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD @@ -56,11 +55,11 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D -add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE -add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE -add wave -noupdate -group {Execution Stage} /testbench/InstrEName -add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE -add wave -noupdate -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName +add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/PCE +add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/InstrE +add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName +add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE +add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName @@ -91,19 +90,10 @@ add wave -noupdate -group CSRs -group {user mode} /testbench/dut/core/priv/priv/ add wave -noupdate -group Bpred -group {branch update selection inputs} -divider {class check} add wave -noupdate -group Bpred -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF add wave -noupdate -group Bpred -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE -add wave -noupdate -group Bpred -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/PredictionPCWrongE -add wave -noupdate -group Bpred -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/InstrClassE -add wave -noupdate -group Bpred -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE -add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/BPPredPCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/SelBPPredF -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/PCNext0F -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNext1F -add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/BPPredWrongE add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1 add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2 @@ -457,7 +447,6 @@ add wave -noupdate -group {debug trace} -expand -group mem -color Yellow /testbe add wave -noupdate -group {debug trace} -expand -group mem /testbench/dut/core/PCM add wave -noupdate -group {debug trace} -expand -group mem -color Brown /testbench/dut/core/hzu/TrapM add wave -noupdate -group {debug trace} -expand -group wb /testbench/PCW -add wave -noupdate -group {pc selection} /testbench/dut/core/ifu/PCNext2F add wave -noupdate -group ifu /testbench/dut/core/ifu/InstrRawF add wave -noupdate -group ifu /testbench/dut/core/ifu/PostSpillInstrRawF add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUStallF @@ -465,7 +454,6 @@ add wave -noupdate -group ifu -group Spill /testbench/dut/core/ifu/Spill/spill/C add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/SpillF add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallD add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/ITLBMissF -add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/InstrDAPageFaultF add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/TakeSpillF add wave -noupdate -group ifu -group Spill /testbench/dut/core/ifu/SelNextSpillF add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HSIZE @@ -558,19 +546,29 @@ add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/c add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/core/ifu/immu/immu/tlb/tlb/tlbcam/camlines[19]/Query1} add wave -noupdate -expand -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]} add wave -noupdate -expand -group {Performance Counters} -label MINSTRET -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]} -add wave -noupdate -expand -group {Performance Counters} -label {LOAD STORE HAZARD} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {BP DIRECTION WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[4]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {BP INSTRUCTION} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[5]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {BTA/JTA WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[6]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {JAL(R) INSTRUCTION} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {RAS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[8]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {RETURN INSTRUCTION} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[9]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {BP CLASS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[10]} -add wave -noupdate -expand -group {Performance Counters} -expand -group BRP -label {Branch Predictor Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]} -add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {ICACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]} -add wave -noupdate -expand -group {Performance Counters} -expand -group ICACHE -label {ICACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]} -add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]} -add wave -noupdate -expand -group {Performance Counters} -expand -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Branch -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {Jump (Not Return)} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[4]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Return -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[5]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[6]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BTA Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[8]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[9]} +add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP CLASS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[10]} +add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Access} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]} +add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]} +add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss Cycles} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]} +add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]} +add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]} +add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]} +add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]} +add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]} +add wave -noupdate -expand -group {Performance Counters} -group Privileged -label {CSR Write} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[19]} +add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Fence.I {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[20]} +add wave -noupdate -expand -group {Performance Counters} -group Privileged -label sfence.VMA {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[21]} +add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Interrupt {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[22]} +add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Exception {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[23]} +add wave -noupdate -expand -group {Performance Counters} -label {FDiv or IDiv Cycles} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[24]} +add wave -noupdate -expand -group {Performance Counters} /testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW add wave -noupdate -group {ifu } -color Gold /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/CurrState add wave -noupdate -group {ifu } /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/HREADY add wave -noupdate -group {ifu } /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/FetchBuffer @@ -604,10 +602,6 @@ add wave -noupdate -group uncore /testbench/dut/uncore/uncore/HRDATA add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/rd add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF add wave -noupdate -group {branch direction} -expand -group {branch outcome} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCSrcE -add wave -noupdate -group {branch direction} -expand -group {branch outcome} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/DirPredictionE -add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/TableDirPredictionF -add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/MatchXF -add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/DirPredictionWrongE add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM add wave -noupdate -group {branch direction} -expand -group conditions /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushE add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF @@ -615,27 +609,23 @@ add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/c add wave -noupdate -group {branch direction} -expand -group ghr /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRE add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushD add wave -noupdate -group {branch direction} -expand -group nextghr2 /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF -add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/NewDirPredictionE add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexE add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/StallM add wave -noupdate -group {branch direction} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/FlushM -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRF add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNextF add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/IndexNextF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/DirPredictionF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BranchInstrF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/MatchNextX -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/PredInstrClassF -add wave -noupdate /testbench/dut/core/ifu/bpred/bpred/PredValidF add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheAccess add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheMiss add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheAccess add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMiss add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM +add wave -noupdate /testbench/clk +add wave -noupdate /testbench/HPMCSample/FinalHPMCOUNTERH +add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {368581 ns} 0} {{Cursor 5} {394987 ns} 1} +WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {717301 ns} 0} {{Cursor 5} {394987 ns} 1} quietly wave cursor active 4 configure wave -namecolwidth 250 configure wave -valuecolwidth 194 @@ -651,4 +641,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {368125 ns} {368797 ns} +WaveRestoreZoom {717254 ns} {717585 ns} diff --git a/testbench/testbench.sv b/testbench/testbench.sv index a37c101f..984c8722 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -401,17 +401,24 @@ logic [3:0] dummy; end // always @ (negedge clk) - if(`PrintHPMCounters & `ZICOUNTERS_SUPPORTED) begin + if(`PrintHPMCounters & `ZICOUNTERS_SUPPORTED) begin : HPMCSample integer HPMCindex; + logic StartSampleFirst; + logic StartSampleDelayed; + logic StartSample; + logic EndSample; + logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0]; + logic [`XLEN-1:0] FinalHPMCOUNTERH[`COUNTERS-1:0]; + string HPMCnames[] = '{"Mcycle", "------", "InstRet", "Br Count", - "Jump, JR, Jal", + "Jump Not Return", "Return", - "Br Dir Wrong", - "Br Pred Wrong", - "Br Target Wrong", + "BP Wrong", + "BP Dir Wrong", + "BP Target Wrong", "RAS Wrong", "Instr Class Wrong", "Load Stall", @@ -429,15 +436,33 @@ logic [3:0] dummy; "Exception", "Divide Cycles" }; + assign StartSampleFirst = FunctionName.FunctionName.FunctionName == "start_trigger"; + flopr #(1) StartSampleReg(clk, reset, StartSampleFirst, StartSampleDelayed); + assign StartSample = StartSampleFirst & ~ StartSampleDelayed; + + assign EndSample = DCacheFlushStart & ~DCacheFlushDone; + always @(negedge clk) begin - if(DCacheFlushStart & ~DCacheFlushDone) begin + if(StartSample) begin + for(HPMCindex = 0; HPMCindex < 32; HPMCindex += 1) begin + InitialHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex]; + end + end + if(EndSample) begin + for(HPMCindex = 0; HPMCindex < 32; HPMCindex += 1) begin + FinalHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex]; + end + end + if(EndSample) begin for(HPMCindex = 0; HPMCindex < HPMCnames.size(); HPMCindex += 1) begin // unlikely to have more than 10M in any counter. - $display("Cnt[%2d] = %7d %s", HPMCindex, dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex], HPMCnames[HPMCindex]); - end - end - end + $display("Cnt[%2d] = %7d %s", HPMCindex, dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex] - InitialHPMCOUNTERH[HPMCindex], HPMCnames[HPMCindex]); + end + end + end end + + // track the current function or global label if (DEBUG == 1) begin : FunctionName From 2d0512936b3b1c6b2c2ce198e4ffb27006667aab Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 14:59:20 -0600 Subject: [PATCH 14/15] Fixed batch mode regression test to work with hpmc loggic. Added logic to exclude the embench warmups from preformance counters. --- sim/wally-batch.do | 4 ++-- testbench/testbench.sv | 30 +++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/sim/wally-batch.do b/sim/wally-batch.do index 3168b452..1992c180 100644 --- a/sim/wally-batch.do +++ b/sim/wally-batch.do @@ -81,11 +81,11 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G TEST=$2 -o testbenchopt - vsim -lib wkdir/work_${1}_${2} testbenchopt -fatal 7 + vsim -lib wkdir/work_${1}_${2} testbenchopt -fatal 7 -suppress 3829 # Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time #vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf #vsim -coverage -lib work_$2 workopt_$2 - + do wave.do # power add generates the logging necessary for said generation. # power add -r /dut/core/* run -all diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 984c8722..e6f025e8 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -406,7 +406,7 @@ logic [3:0] dummy; logic StartSampleFirst; logic StartSampleDelayed; logic StartSample; - logic EndSample; + logic EndSample, EndSampleFirst, EndSampleDelayed; logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0]; logic [`XLEN-1:0] FinalHPMCOUNTERH[`COUNTERS-1:0]; @@ -436,11 +436,27 @@ logic [3:0] dummy; "Exception", "Divide Cycles" }; - assign StartSampleFirst = FunctionName.FunctionName.FunctionName == "start_trigger"; - flopr #(1) StartSampleReg(clk, reset, StartSampleFirst, StartSampleDelayed); - assign StartSample = StartSampleFirst & ~ StartSampleDelayed; - - assign EndSample = DCacheFlushStart & ~DCacheFlushDone; + + if(TEST == "embench") begin + // embench runs warmup then runs start_trigger + // embench end with stop_trigger. + assign StartSampleFirst = FunctionName.FunctionName.FunctionName == "start_trigger"; + flopr #(1) StartSampleReg(clk, reset, StartSampleFirst, StartSampleDelayed); + assign StartSample = StartSampleFirst & ~ StartSampleDelayed; + + assign EndSampleFirst = FunctionName.FunctionName.FunctionName == "stop_trigger"; + flopr #(1) EndSampleReg(clk, reset, EndSampleFirst, EndSampleDelayed); + assign EndSample = EndSampleFirst & ~ EndSampleDelayed; + + end else begin + // default start condiction is reset + // default end condiction is end of test (DCacheFlushDone) + assign StartSampleFirst = InReset; + flopr #(1) StartSampleReg(clk, reset, StartSampleFirst, StartSampleDelayed); + assign StartSample = StartSampleFirst & ~ StartSampleDelayed; + + assign EndSample = DCacheFlushStart & ~DCacheFlushDone; + end always @(negedge clk) begin if(StartSample) begin @@ -465,7 +481,7 @@ logic [3:0] dummy; // track the current function or global label - if (DEBUG == 1) begin : FunctionName + if (DEBUG == 1 | (`PrintHPMCounters & `ZICOUNTERS_SUPPORTED)) begin : FunctionName FunctionName FunctionName(.reset(reset), .clk(clk), .ProgramAddrMapFile(ProgramAddrMapFile), From daaea6064d0571d41ecdcac6d35744b5e6e5eab1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 15:10:07 -0600 Subject: [PATCH 15/15] Oups included the wave file in the wally-batch.do script. --- sim/wally-batch.do | 1 - 1 file changed, 1 deletion(-) diff --git a/sim/wally-batch.do b/sim/wally-batch.do index 1992c180..7e63de8a 100644 --- a/sim/wally-batch.do +++ b/sim/wally-batch.do @@ -85,7 +85,6 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time #vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf #vsim -coverage -lib work_$2 workopt_$2 - do wave.do # power add generates the logging necessary for said generation. # power add -r /dut/core/* run -all