Merge pull request #415 from ross144/main

added new branch predictor post processing script and updated buildroot config script to support c++
2025-02-11 06:05:49 +00:00 · 2023-10-03 20:25:56 -07:00 · 2023-10-03 20:25:56 -07:00 · 0d0d5d8af0
commit 0d0d5d8af0
parent 93b12ff942 0ff34c0d06
21 changed files with 811 additions and 1487 deletions
--- a/bin/parseHPMC.py
+++ b/bin/parseHPMC.py
@ -1,8 +1,8 @@
 #!/usr/bin/python3

 ###########################################
-## Written: Ross Thompson ross1728@gmail.com
-## Created: 4 Jan 2022
+## Written: Rose Thompson ross1728@gmail.com
+## Created: 20 September 2023
 ## Modified: 
 ##
 ## Purpose: Parses the performance counters from a modelsim trace.
@ -28,110 +28,29 @@
 import os
 import sys
 import matplotlib.pyplot as plt
-import re
+import math
+import numpy as np
+import argparse

-#RefData={'twobitCModel' :(['6', '8', '10', '12', '14', '16'],
-#                          [11.0680836450622, 8.53864970807778, 7.59565430177984, 6.38741598498948, 5.83662961500838, 5.83662961500838]),
-#         'gshareCModel' : (['6', '8', '10', '12', '14', '16'],
-#                           [14.5859173702079, 12.3634674403619, 10.5806018170154, 8.38831266973592, 6.37097544620762, 3.52638362703015])
-#}
+RefData = [('twobitCModel6', 'twobitCModel', 64, 9.65280765420711), ('twobitCModel8', 'twobitCModel', 256, 8.75120245829945), ('twobitCModel10', 'twobitCModel', 1024, 8.1318382397263),
+           ('twobitCModel12', 'twobitCModel', 4096, 7.53026646633342), ('twobitCModel14', 'twobitCModel', 16384, 6.07679338544009), ('twobitCModel16', 'twobitCModel', 65536, 6.07679338544009),
+           ('gshareCModel6', 'gshareCModel', 64, 10.6602835418646), ('gshareCModel8', 'gshareCModel', 256, 8.38384710559667), ('gshareCModel10', 'gshareCModel', 1024, 6.36847432155534),
+           ('gshareCModel12', 'gshareCModel', 4096, 3.91108491151983), ('gshareCModel14', 'gshareCModel', 16384, 2.83926519215395), ('gshareCModel16', 'gshareCModel', 65536, .60213659066941)]

-#RefData = [('twobitCModel6', 11.0501534891674), ('twobitCModel8', 8.51829052266352), ('twobitCModel10', 7.56775222626483),
-#           ('twobitCModel12', 6.31366834586515), ('twobitCModel14', 5.72699936834177), ('twobitCModel16', 5.72699936834177),
-#           ('gshareCModel6', 14.5731555979574), ('gshareCModel8', 12.3155658100497), ('gshareCModel10', 10.4589596630561),
-#           ('gshareCModel12', 8.25796055444401), ('gshareCModel14', 6.23093702707613), ('gshareCModel16', 3.34001125650374)]
-
-RefData = [('twobitCModel6', 9.65280765420711), ('twobitCModel8', 8.75120245829945), ('twobitCModel10', 8.1318382397263),
-           ('twobitCModel12', 7.53026646633342), ('twobitCModel14', 6.07679338544009), ('twobitCModel16', 6.07679338544009),
-           ('gshareCModel6', 10.6602835418646), ('gshareCModel8', 8.38384710559667), ('gshareCModel10', 6.36847432155534),
-           ('gshareCModel12', 3.91108491151983), ('gshareCModel14', 2.83926519215395), ('gshareCModel16', .60213659066941)]
-
-
-def ComputeCPI(benchmark):
-    'Computes and inserts CPI into benchmark stats.'
-    (nameString, opt, dataDict) = benchmark
-    CPI = 1.0 * int(dataDict['Mcycle']) / int(dataDict['InstRet'])
-    dataDict['CPI'] = CPI
-
-def ComputeBranchDirMissRate(benchmark):
-    'Computes and inserts branch direction miss prediction rate.'
-    (nameString, opt, dataDict) = benchmark
-    branchDirMissRate = 100.0 * int(dataDict['BP Dir Wrong']) / int(dataDict['Br Count'])
-    dataDict['BDMR'] = branchDirMissRate
-
-def ComputeBranchTargetMissRate(benchmark):
-    'Computes and inserts branch target miss prediction rate.'
-    # *** this is wrong in the verilog test bench
-    (nameString, opt, dataDict) = benchmark
-    branchTargetMissRate = 100.0 * int(dataDict['BP Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump Not Return']))
-    dataDict['BTMR'] = branchTargetMissRate
-
-def ComputeRASMissRate(benchmark):
-    'Computes and inserts return address stack miss prediction rate.'
-    (nameString, opt, dataDict) = benchmark
-    RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['Return'])
-    dataDict['RASMPR'] = RASMPR
-
-def ComputeInstrClassMissRate(benchmark):
-    'Computes and inserts instruction class miss prediction rate.'
-    (nameString, opt, dataDict) = benchmark
-    ClassMPR = 100.0 * int(dataDict['Instr Class Wrong']) / int(dataDict['InstRet'])
-    dataDict['ClassMPR'] = ClassMPR
-    
-def ComputeICacheMissRate(benchmark):
-    'Computes and inserts instruction class miss prediction rate.'
-    (nameString, opt, dataDict) = benchmark
-    ICacheMR = 100.0 * int(dataDict['I Cache Miss']) / int(dataDict['I Cache Access'])
-    dataDict['ICacheMR'] = ICacheMR
-
-def ComputeICacheMissTime(benchmark):
-    'Computes and inserts instruction class miss prediction rate.'
-    (nameString, opt, dataDict) = benchmark
-    cycles = int(dataDict['I Cache Miss'])
-    if(cycles == 0): ICacheMR = 0
-    else: ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / cycles
-    dataDict['ICacheMT'] = ICacheMR
-    
-def ComputeDCacheMissRate(benchmark):
-    'Computes and inserts instruction class miss prediction rate.'
-    (nameString, opt, dataDict) = benchmark
-    DCacheMR = 100.0 * int(dataDict['D Cache Miss']) / int(dataDict['D Cache Access'])
-    dataDict['DCacheMR'] = DCacheMR
-
-def ComputeDCacheMissTime(benchmark):
-    'Computes and inserts instruction class miss prediction rate.'
-    (nameString, opt, dataDict) = benchmark
-    cycles = int(dataDict['D Cache Miss'])
-    if(cycles == 0): DCacheMR = 0
-    else: DCacheMR = 100.0 * int(dataDict['D Cache Cycles']) / cycles
-    dataDict['DCacheMT'] = DCacheMR
-
-def ComputeAll(benchmarks):
-    for benchmark in benchmarks:
-        ComputeCPI(benchmark)
-        ComputeBranchDirMissRate(benchmark)
-        ComputeBranchTargetMissRate(benchmark)
-        ComputeRASMissRate(benchmark)
-        ComputeInstrClassMissRate(benchmark)
-        ComputeICacheMissRate(benchmark)
-        ComputeICacheMissTime(benchmark)
-        ComputeDCacheMissRate(benchmark)
-        ComputeDCacheMissTime(benchmark)
-    
-def printStats(benchmark):
-    (nameString, opt, dataDict) = benchmark
-    print('Test', nameString)
-    print('Compile configuration', opt)
-    print('CPI \t\t\t  %1.2f' % dataDict['CPI'])
-    print('Branch Dir Pred Miss Rate %2.2f' % dataDict['BDMR'])
-    print('Branch Target Pred Miss Rate %2.2f' % dataDict['BTMR'])
-    print('RAS Miss Rate \t\t  %1.2f' % dataDict['RASMPR'])
-    print('Instr Class Miss Rate  %1.2f' % dataDict['ClassMPR'])
-    print('I Cache Miss Rate  %1.4f' % dataDict['ICacheMR'])
-    print('I Cache Miss Ave Cycles  %1.4f' % dataDict['ICacheMT'])
-    print('D Cache Miss Rate  %1.4f' % dataDict['DCacheMR'])
-    print('D Cache Miss Ave Cycles  %1.4f' % dataDict['DCacheMT'])
-    print()
+def ParseBranchListFile(path):
+    '''Take the path to the list of Questa Sim log files containing the performance counters outputs.  File
+    is formated in row columns.  Each row is a trace with the file, branch predictor type, and the parameters.
+    parameters can be any number and depend on the predictor type. Returns a list of lists.'''
+    lst = []
+    BranchList = open(path, 'r')
+    for line in BranchList:
+        tokens = line.split()
+        predictorLog = os.path.dirname(path) + '/' + tokens[0]
+        predictorType = tokens[1]
+        predictorParams = tokens[2::]
+        lst.append([predictorLog, predictorType, predictorParams])
+        #print(predictorLog, predictorType, predictorParams)
+    return lst
    
 def ProcessFile(fileName):
    '''Extract preformance counters from a modelsim log.  Outputs a list of tuples for each test/benchmark.
@ -150,43 +69,37 @@ def ProcessFile(fileName):
            HPMClist = { }
        elif(len(lineToken) > 4 and lineToken[1][0:3] == 'Cnt'):
            countToken = line.split('=')[1].split()
-            value = int(countToken[0])
+            value = int(countToken[0]) if countToken[0] != 'x' else 0
            name = ' '.join(countToken[1:])
            HPMClist[name] = value
        elif ('is done' in line):
            benchmarks.append((testName, opt, HPMClist))
    return benchmarks

-def ComputeArithmeticAverage(benchmarks):
-    average = {}
-    index = 0
-    for (testName, opt, HPMClist) in benchmarks:
-        for field in HPMClist:
-            value = HPMClist[field]
-            if field not in average:
-                average[field] = value
-            else:
-                average[field] += value
-        index += 1
-    benchmarks.append(('All', '', average))

-def FormatToPlot(currBenchmark):
-    names = []
-    values = []
-    for config in currBenchmark:
-        #print ('config' , config)
-        names.append(config[0])
-        values.append(config[1])
-    return (names, values)
+def ComputeStats(benchmarks):
+    for benchmark in benchmarks:
+        (nameString, opt, dataDict) = benchmark
+        dataDict['CPI'] = 1.0 * int(dataDict['Mcycle']) / int(dataDict['InstRet'])
+        dataDict['BDMR'] = 100.0 * int(dataDict['BP Dir Wrong']) / int(dataDict['Br Count'])
+        dataDict['BTMR'] = 100.0 * int(dataDict['BP Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump Not Return']))
+        dataDict['RASMPR'] = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['Return'])
+        dataDict['ClassMPR'] = 100.0 * int(dataDict['Instr Class Wrong']) / int(dataDict['InstRet'])
+        dataDict['ICacheMR'] = 100.0 * int(dataDict['I Cache Miss']) / int(dataDict['I Cache Access'])
+
+        cycles = int(dataDict['I Cache Miss'])
+        if(cycles == 0): ICacheMR = 0
+        else: ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / cycles
+        dataDict['ICacheMT'] = ICacheMR
+
+        dataDict['DCacheMR'] = 100.0 * int(dataDict['D Cache Miss']) / int(dataDict['D Cache Access'])
+
+        (nameString, opt, dataDict) = benchmark
+        cycles = int(dataDict['D Cache Miss'])
+        if(cycles == 0): DCacheMR = 0
+        else: DCacheMR = 100.0 * int(dataDict['D Cache Cycles']) / cycles
+        dataDict['DCacheMT'] = DCacheMR

-def GeometricAverage(benchmarks, field):
-    Product = 1
-    index = 0
-    for (testName, opt, HPMCList) in benchmarks:
-        #print(HPMCList)
-        Product *= HPMCList[field]
-        index += 1
-    return Product ** (1.0/index)

 def ComputeGeometricAverage(benchmarks):
    fields = ['BDMR', 'BTMR', 'RASMPR', 'ClassMPR', 'ICacheMR', 'DCacheMR', 'CPI', 'ICacheMT', 'DCacheMT']
@ -196,129 +109,362 @@ def ComputeGeometricAverage(benchmarks):
        index = 0
        for (testName, opt, HPMCList) in benchmarks:
            #print(HPMCList)
-            Product *= HPMCList[field]
+            value = HPMCList[field]
+            if(value != 0): Product *= value # if that value is 0 exclude from mean because it destories the geo mean
            index += 1
        AllAve[field] = Product ** (1.0/index)
-    benchmarks.append(('All', '', AllAve))
+    benchmarks.append(('Mean', '', AllAve))

-if(sys.argv[1] == '-b'):
-    configList = []
-    summery = 0
-    if(sys.argv[2] == '-s'):
-        summery = 1
-        sys.argv = sys.argv[1::]
-    for config in sys.argv[2::]:
-        benchmarks = ProcessFile(config)
-        #ComputeArithmeticAverage(benchmarks)
-        ComputeAll(benchmarks)
-        ComputeGeometricAverage(benchmarks)
-        #print('CONFIG: %s GEO MEAN: %f' % (config, GeometricAverage(benchmarks, 'BDMR')))
-        configList.append((config.split('.')[0], benchmarks))
+def GenerateName(predictorType, predictorParams):
+    if(predictorType == 'gshare' or  predictorType == 'twobit' or predictorType == 'btb' or predictorType == 'class' or predictorType == 'ras'):
+        return predictorType + predictorParams[0]
+    elif(predictorParams == 'local'):
+        return predictorType + predictorParams[0] + '_' + predictorParams[1]
+    else:
+        print(f'Error unsupported predictor type {predictorType}')
+        sys.exit(-1)

-    # Merge all configruations into a single list
-    benchmarkAll = []
-    for (config, benchmarks) in configList:
-        #print(config)
+def ComputePredNumEntries(predictorType, predictorParams):
+    if(predictorType == 'gshare' or  predictorType == 'twobit' or predictorType == 'btb' or predictorType == 'class'):
+        return 2**int(predictorParams[0])
+    elif(predictorType == 'ras'):
+        return int(predictorParams[0])
+    elif(predictorParams == 'local'):
+        return 2**int(predictorParams[0]) * int(predictorParams[1]) + 2**int(predictorParams[1])
+    else:
+        print(f'Error unsupported predictor type {predictorType}')
+        sys.exit(-1)
+
+def BuildDataBase(predictorLogs):
+    # Once done with the following loop, performanceCounterList will contain the predictor type and size along with the
+    # raw performance counter data and the processed data on a per benchmark basis.  It also includes the geometric mean.
+    # list
+    #   branch predictor configuration 0 (tuple)
+    #     benchmark name
+    #     compiler optimization
+    #     data (dictionary)
+    #       dictionary of performance counters
+    #   branch predictor configuration 1 (tuple)
+    #     benchmark name (dictionary)
+    #     compiler optimization
+    #     data
+    #       dictionary of performance counters
+    # ...
+    performanceCounterList = []
+    for trace in predictorLogs:
+        predictorLog = trace[0]
+        predictorType = trace[1]
+        predictorParams = trace[2]
+        # Extract the performance counter data
+        performanceCounters = ProcessFile(predictorLog)
+        ComputeStats(performanceCounters)
+        ComputeGeometricAverage(performanceCounters)
+        #print(performanceCounters)
+        performanceCounterList.append([GenerateName(predictorType, predictorParams), predictorType, performanceCounters, ComputePredNumEntries(predictorType, predictorParams)])
+    return performanceCounterList
+
+def ReorderDataBase(performanceCounterList):
+    # Reorder the data so the benchmark name comes first, then the branch predictor configuration
+    benchmarkFirstList = []
+    for (predictorName, predictorPrefixName, benchmarks, entries) in performanceCounterList:
        for benchmark in benchmarks:
            (nameString, opt, dataDict) = benchmark
-            #print("BENCHMARK")
-            #print(nameString)
-            #print(opt)
-            #print(dataDict)
-            benchmarkAll.append((nameString, opt, config, dataDict))
-    #print('ALL!!!!!!!!!!')
-    #for bench in benchmarkAll:
-    #    print('BENCHMARK')
-    #    print(bench)
-    #print('ALL!!!!!!!!!!')
+            benchmarkFirstList.append((nameString, opt, predictorName, predictorPrefixName, entries, dataDict))
+    return benchmarkFirstList

+def ExtractSelectedData(benchmarkFirstList):
    # now extract all branch prediction direction miss rates for each
    # namestring + opt, config
    benchmarkDict = { }
-    for benchmark in benchmarkAll:
-        (name, opt, config, dataDict) = benchmark
-        if name+'_'+opt in benchmarkDict:
-            benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR']))
+    for benchmark in benchmarkFirstList:
+        (name, opt, config, prefixName, entries, dataDict) = benchmark
+        if opt == 'bd_speedopt_speed': NewName = name+'Sp'
+        elif opt == 'bd_sizeopt_speed': NewName = name+'Sz'
+        else: NewName = name
+        #print(NewName)
+        #NewName = name+'_'+opt
+        if NewName in benchmarkDict:
+            benchmarkDict[NewName].append((config, prefixName, entries, dataDict[ReportPredictorType]))
        else:
-            benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])]
+            benchmarkDict[NewName] = [(config, prefixName, entries, dataDict[ReportPredictorType])]
+    return benchmarkDict

-    size = len(benchmarkDict)
-    index = 1
-    if(summery == 0):
-        #print('Number of plots', size)
+def ReportAsTable(benchmarkDict):
+    refLine = benchmarkDict['Mean']
+    FirstLine = []
+    SecondLine = []
+    for (name, typ, size, val) in refLine:
+        FirstLine.append(name)
+        SecondLine.append(size)

-        for benchmarkName in benchmarkDict:
-            currBenchmark = benchmarkDict[benchmarkName]
-            (names, values) = FormatToPlot(currBenchmark)
-            print(names, values)
-            plt.subplot(6, 7, index)
-            plt.bar(names, values)
-            plt.title(benchmarkName)
-            plt.ylabel('BR Dir Miss Rate (%)')
-            #plt.xlabel('Predictor')
-            index += 1
-    else:
-        combined = benchmarkDict['All_']
-        # merge the reference data into rtl data
-        # combined.extend(RefData)
-        (name, value) = FormatToPlot(combined)
-        lst = []
-        dct = {}
-        category = []
-        length = []
-        accuracy = []
-        for index in range(0, len(name)):
-            match = re.match(r"([a-z]+)([0-9]+)", name[index], re.I)
-            percent = 100 -value[index]
-            if match:
-                (PredType, size) = match.groups()
-                category.append(PredType)
-                length.append(size)
-                accuracy.append(percent)
-                if(PredType not in dct):
-                    dct[PredType] = ([size], [percent])
-                else:
-                    (currSize, currPercent) = dct[PredType]
-                    currSize.append(size)
-                    currPercent.append(percent)
-                    dct[PredType] = (currSize, currPercent)
-        print(dct)
+    sys.stdout.write('benchmark\t\t')
+    for name in FirstLine:
+        if(len(name) < 8): sys.stdout.write('%s\t\t' % name)
+        else: sys.stdout.write('%s\t' % name)        
+    sys.stdout.write('\n')
+    sys.stdout.write('size\t\t\t')
+    for size in SecondLine:
+        if(len(str(size)) < 8): sys.stdout.write('%d\t\t' % size)
+        else: sys.stdout.write('%d\t' % size)        
+    sys.stdout.write('\n')
+
+    if(args.summary):
+        sys.stdout.write('Mean\t\t\t')
+        for (name, typ, size, val) in refLine:
+            sys.stdout.write('%0.2f\t\t' % (val if not args.invert else 100 - val))
+        sys.stdout.write('\n')
+
+    if(not args.summary):
+        for benchmark in benchmarkDict:
+            length = len(benchmark)
+            if(length < 8): sys.stdout.write('%s\t\t\t' % benchmark)
+            elif(length < 16): sys.stdout.write('%s\t\t' % benchmark)
+            else: sys.stdout.write('%s\t' % benchmark)
+            for (name, typ, size, val) in benchmarkDict[benchmark]:
+                sys.stdout.write('%0.2f\t\t' % (val if not args.invert else 100 -val))
+            sys.stdout.write('\n')
+
+def ReportAsText(benchmarkDict):
+    if(args.summary):
+        mean = benchmarkDict['Mean']
+        print('Mean')
+        for (name, typ, size, val) in mean:
+            sys.stdout.write('%s %s %0.2f\n' % (name, size, val if not args.invert else 100 - val))
+        
+    if(not args.summary):
+        for benchmark in benchmarkDict:
+            print(benchmark)
+            for (name, type, size, val) in benchmarkDict[benchmark]:
+                sys.stdout.write('%s %s %0.2f\n' % (name, size, val if not args.invert else 100 - val))
+
+def Inversion(lst):
+    return [x if not args.invert else 100 - x for x in lst]
+
+def BarGraph(seriesDict, xlabelList, BenchPerRow, FileName):
+    index = 0
+    NumberInGroup = len(seriesDict)
+    # Figure out width of bars.  NumberInGroup bars + want 2 bar space
+    # the space between groups is 1
+    EffectiveNumInGroup = NumberInGroup + 2
+    barWidth = 1 / EffectiveNumInGroup
+    fig = plt.subplots(figsize = (EffectiveNumInGroup*BenchPerRow/8, 4))
+    colors = ['blue', 'blue', 'blue', 'blue', 'blue', 'blue', 'black', 'black', 'black', 'black', 'black', 'black']
+    for name in seriesDict:
+        xpos = np.arange(BenchPerRow)
+        xpos = [x + index*barWidth for x in xpos]
+        values = seriesDict[name]
+        plt.bar(xpos, Inversion(values), width=barWidth, edgecolor='grey', label=name, color=colors[index%len(colors)])
+        index += 1
+    plt.xticks([r + barWidth*(NumberInGroup/2-0.5) for r in range(0, BenchPerRow)], xlabelList)
+    plt.xlabel('Benchmark')
+    if(not args.invert): plt.ylabel('Misprediction Rate (%)')
+    else:  plt.ylabel('Prediction Accuracy (%)') 
+    plt.legend(loc='upper left', ncol=2)
+    plt.savefig(FileName)
+
+def SelectPartition(xlabelListBig, seriesDictBig, group, BenchPerRow):
+    seriesDictTrunk = {}
+    for benchmarkName in seriesDictBig:
+        lst = seriesDictBig[benchmarkName]
+        seriesDictTrunk[benchmarkName] = lst[group*BenchPerRow:(group+1)*BenchPerRow]
+    xlabelListTrunk = xlabelListBig[group*BenchPerRow:(group+1)*BenchPerRow]
+    return(xlabelListTrunk, seriesDictTrunk)
+
+
+def ReportAsGraph(benchmarkDict, bar):
+    def FormatToPlot(currBenchmark):
+        names = []
+        sizes = []
+        values = []
+        typs = []
+        for config in currBenchmark:
+            names.append(config[0])
+            sizes.append(config[1])
+            values.append(config[2])
+            typs.append(config[3])
+        return (names, sizes, values, typs)
+    titlesInvert = {'BDMR' : 'Branch Direction Accuracy',
+              'BTMR' : 'Branch Target Accuracy',
+              'RASMPR': 'RAS Accuracy',
+              'ClassMPR': 'Class Prediction Accuracy'}
+    titles = {'BDMR' : 'Branch Direction Misprediction',
+              'BTMR' : 'Branch Target Misprediction',
+              'RASMPR': 'RAS Misprediction',
+              'ClassMPR': 'Class Misprediction'}
+    if(args.summary):
+        markers = ['x', '.', '+', '*', '^', 'o', ',', 's']
+        colors = ['blue', 'black', 'gray', 'dodgerblue', 'lightsteelblue', 'turquoise', 'black', 'blue']
+        temp = benchmarkDict['Mean']
+
+        # the benchmarkDict['Mean'] contains sequencies of results for multiple
+        # branch predictors with various parameterizations
+        # group the parameterizations by the common typ.
+        sequencies = {}
+        for (name, typ, size, value) in benchmarkDict['Mean']:
+            if not typ in sequencies:
+                sequencies[typ] = [(size, value)]
+            else:
+                sequencies[typ].append((size,value))
+        # then graph the common typ as a single line+scatter plot
+        # finally repeat for all typs of branch predictors and overlay
        fig, axes = plt.subplots()
-        marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*', 'tenlocal' : '.', 'eightlocal' : ',', 'fourlocal' : 'x', 'tenlocalahead' : '.', 'eightlocalahead' : ',', 'fourlocalahead' : 'x', 'tenlocalrepair' : 'x'}
-        colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue', 'tenlocal' : 'lightblue', 'eightlocal' : 'lightblue', 'fourlocal' : 'lightblue', 'tenlocalahead' : 'lightblue', 'eightlocalahead' : 'lightblue', 'fourlocalahead' : 'lightblue', 'tenlocalrepair' : 'lightblue'}
-        for cat in dct:
-            (x, y) = dct[cat]
-            x=[int(2**int(v)) for v in x]
-            #print(x, y)
-            print(cat)
-            axes.plot(x,y, color=colors[cat])
-            axes.scatter(x,y, label=cat, marker=marker[cat], color=colors[cat])
-            #plt.scatter(x, y, label=cat)
-            #plt.plot(x, y)
-            #axes.set_xticks([4, 6, 8, 10, 12, 14])
+        index = 0
+        if(args.invert): plt.title(titlesInvert[ReportPredictorType])
+        else: plt.title(titles[ReportPredictorType])
+        for branchPredName in sequencies:
+            data = sequencies[branchPredName]
+            (xdata, ydata) = zip(*data) 
+            if args.invert: ydata = [100 - x for x in ydata]
+            axes.plot(xdata, ydata, color=colors[index])
+            axes.scatter(xdata, ydata, label=branchPredName, color=colors[index], marker=markers[index])
+            index = (index + 1) % len(markers)
        axes.legend(loc='upper left')
        axes.set_xscale("log")
        axes.set_ylabel('Prediction Accuracy')
        axes.set_xlabel('Entries')
-        axes.set_xticks([64, 256, 1024, 4096, 16384, 65536])        
-        axes.set_xticklabels([64, 256, 1024, 4096, 16384, 65536])
+        axes.set_xticks(xdata)
+        axes.set_xticklabels(xdata)
        axes.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
-    plt.show()
+        plt.show()
        

-else:
-    # steps 1 and 2
-    benchmarks = ProcessFile(sys.argv[1])
-    print(benchmarks[0])
-    ComputeAll(benchmarks)
-    ComputeGeometricAverage(benchmarks)
-    # 3 process into useful data
-    # cache hit rates
-    # cache fill time
-    # branch predictor status
-    # hazard counts
-    # CPI
-    # instruction distribution
-    for benchmark in benchmarks:
-        printStats(benchmark)
+    # if(not args.summary):
+    #     size = len(benchmarkDict)
+    #     sizeSqrt = math.sqrt(size)
+    #     isSquare = math.isclose(sizeSqrt, round(sizeSqrt))
+    #     numCol = math.floor(sizeSqrt)
+    #     numRow = numCol + (0 if isSquare else 1)
+    #     index = 1
+    #     fig = plt.figure()
+    #     for benchmarkName in benchmarkDict:
+    #         currBenchmark = benchmarkDict[benchmarkName]
+    #         (names, typs, sizes, values) = FormatToPlot(currBenchmark)
+    #         #axes.plot(numRow, numCol, index)
+    #         ax = fig.add_subplot(numRow, numCol, index)
+    #         ax.bar(names, values)
+    #         ax.title.set_text(benchmarkName)
+    #         #plt.ylabel('BR Dir Miss Rate (%)')
+    #         #plt.xlabel('Predictor')
+    #         index += 1

+    if(not args.summary):
+        size = len(benchmarkDict)
+        sizeSqrt = math.sqrt(size)
+        isSquare = math.isclose(sizeSqrt, round(sizeSqrt))
+        numCol = math.floor(sizeSqrt)
+        numRow = numCol + (0 if isSquare else 1)
+        index = 1
+        BenchPerRow = 7
+
+        xlabelList = []
+        seriesDict = {}
+
+        for benchmarkName in benchmarkDict:
+            currBenchmark = benchmarkDict[benchmarkName]
+            xlabelList.append(benchmarkName)
+            for (name, typ, size, value) in currBenchmark:
+                if(name not in seriesDict):
+                    seriesDict[name] = [value]
+                else:
+                    seriesDict[name].append(value)
+            if(index >= BenchPerRow): break
+            index += 1
+
+        xlabelListBig = []
+        seriesDictBig = {}
+        for benchmarkName in benchmarkDict:
+            currBenchmark = benchmarkDict[benchmarkName]
+            xlabelListBig.append(benchmarkName)
+            for (name, typ, size, value) in currBenchmark:
+                if(name not in seriesDictBig):
+                    seriesDictBig[name] = [value]
+                else:
+                    seriesDictBig[name].append(value)
+
+        #The next step will be to split the benchmarkDict into length BenchPerRow pieces then repeat the following code
+        # on each piece.
+        for row in range(0, math.ceil(39 / BenchPerRow)):
+            (xlabelListTrunk, seriesDictTrunk) = SelectPartition(xlabelListBig, seriesDictBig, row, BenchPerRow)
+            FileName = 'barSegment%d.png' % row
+            groupLen = len(xlabelListTrunk)
+            BarGraph(seriesDictTrunk, xlabelListTrunk, groupLen, FileName)
+
+
+# main
+parser = argparse.ArgumentParser(description='Parses performance counters from a Questa Sim trace to produce a graph or graphs.')
+
+# parse program arguments
+metric = parser.add_mutually_exclusive_group()
+metric.add_argument('-r', '--ras', action='store_const', help='Plot return address stack (RAS) performance.', default=False, const=True)
+metric.add_argument('-d', '--direction', action='store_const', help='Plot direction prediction (2-bit, Gshare, local, etc) performance.', default=False, const=True)
+metric.add_argument('-t', '--target', action='store_const', help='Plot branch target buffer (BTB) performance.', default=False, const=True)
+metric.add_argument('-c', '--iclass', action='store_const', help='Plot instruction classification performance.', default=False, const=True)
+
+parser.add_argument('-s', '--summary', action='store_const', help='Show only the geometric average for all benchmarks.', default=False, const=True)
+parser.add_argument('-b', '--bar', action='store_const', help='Plot graphs.', default=False, const=True)
+parser.add_argument('-g', '--reference', action='store_const', help='Include the golden reference model from branch-predictor-simulator. Data stored statically at the top of %(prog)s.  If you need to regenreate use CModelBranchAcurracy.sh', default=False, const=True)
+parser.add_argument('-i', '--invert', action='store_const', help='Invert metric. Example Branch miss prediction becomes prediction accuracy. 100 - miss rate', default=False, const=True)
+
+displayMode = parser.add_mutually_exclusive_group()
+displayMode.add_argument('--text', action='store_const', help='Display in text format only.', default=False, const=True)
+displayMode.add_argument('--table', action='store_const', help='Display in text format only.', default=False, const=True)
+displayMode.add_argument('--gui', action='store_const', help='Display in text format only.', default=False, const=True)
+displayMode.add_argument('--debug', action='store_const', help='Display in text format only.', default=False, const=True)
+parser.add_argument('sources', nargs=1)
+
+args = parser.parse_args()
+
+# Figure what we are reporting
+ReportPredictorType = 'BDMR'  # default
+if(args.ras): ReportPredictorType = 'RASMPR'
+if(args.target): ReportPredictorType = 'BTMR'
+if(args.iclass): ReportPredictorType = 'ClassMPR'
+
+# Figure how we are displaying the data
+ReportMode = 'gui' # default
+if(args.text): ReportMode = 'text'
+if(args.table): ReportMode = 'table'
+if(args.debug): ReportMode = 'debug'
+
+# read the questa sim list file.
+# row, col format.  each row is a questa sim run with performance counters and a particular
+# branch predictor type and size. size can be multiple parameters for more complex predictors like
+# local history and tage.
+# <file> <type> <size>
+predictorLogs = ParseBranchListFile(args.sources[0])          # digests the traces
+performanceCounterList = BuildDataBase(predictorLogs)         # builds a database of performance counters by trace and then by benchmark
+benchmarkFirstList = ReorderDataBase(performanceCounterList)  # reorder first by benchmark then trace
+benchmarkDict = ExtractSelectedData(benchmarkFirstList)       # filters to just the desired performance counter metric
+
+if(args.reference): benchmarkDict['Mean'].extend(RefData)
+#print(benchmarkDict['Mean'])
+#print(benchmarkDict['aha-mont64Speed'])
+#print(benchmarkDict)
+
+# table format
+if(ReportMode == 'table'):
+    ReportAsTable(benchmarkDict)
+
+if(ReportMode == 'text'):
+    ReportAsText(benchmarkDict)
+
+if(ReportMode == 'gui'):
+    ReportAsGraph(benchmarkDict, args.bar)
+            
+# *** this is only needed of -b (no -s)
+
+# debug
+#config0 = performanceCounterList[0][0]
+#data0 = performanceCounterList[0][1]
+#bench0 = data0[0]
+#bench0name = bench0[0]
+#bench0data = bench0[2]
+#bench0BrCount = bench0data['Br Count']
+#bench1 = data0[1]
+
+#print(data0)
+#print(bench0)
+#print(bench1)
+
+#print(bench0name)
+#print(bench0BrCount)
--- a/config/buildroot/config.vh
+++ b/config/buildroot/config.vh
@ -142,6 +142,7 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_SIZE = 32'd10;
 localparam BPRED_NUM_LHR = 32'd6;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;


 localparam SVADU_SUPPORTED = 1;
--- a/config/fpga/config.vh
+++ b/config/fpga/config.vh
@ -156,6 +156,7 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_NUM_LHR = 32'd6;
 localparam BPRED_SIZE = 32'd12;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;

 localparam SVADU_SUPPORTED = 1;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/rv32e/config.vh
+++ b/config/rv32e/config.vh
@ -144,6 +144,7 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_SIZE = 32'd10;
 localparam BPRED_NUM_LHR = 32'd6;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;

 localparam SVADU_SUPPORTED = 0;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/rv32gc/config.vh
+++ b/config/rv32gc/config.vh
@ -150,7 +150,13 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_SIZE = 32'd10;
 `endif
 localparam BPRED_NUM_LHR = 32'd6;
+`ifdef BTB_OVERRIDE
+localparam BTB_SIZE = `BTB_SIZE;
+localparam RAS_SIZE = `RAS_SIZE;
+`else
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;
+`endif

 localparam SVADU_SUPPORTED = 1;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/rv32i/config.vh
+++ b/config/rv32i/config.vh
@ -144,6 +144,7 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_SIZE = 32'd10;
 localparam BPRED_NUM_LHR = 32'd6;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;

 localparam SVADU_SUPPORTED = 0;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/rv32imc/config.vh
+++ b/config/rv32imc/config.vh
@ -143,6 +143,7 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_SIZE = 32'd10;
 localparam BPRED_NUM_LHR = 32'd6;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;

 localparam SVADU_SUPPORTED = 0;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/rv64fpquad/config.vh
+++ b/config/rv64fpquad/config.vh
@ -146,6 +146,7 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_SIZE = 32'd10;
 localparam BPRED_NUM_LHR = 32'd6;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;

 localparam SVADU_SUPPORTED = 0;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/rv64gc/config.vh
+++ b/config/rv64gc/config.vh
@ -147,8 +147,9 @@ localparam PLIC_SDC_ID = 32'd9;
 localparam BPRED_SUPPORTED = 1;
 localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
 localparam BPRED_NUM_LHR = 32'd6;
-localparam BPRED_SIZE = 32'd10;
+localparam BPRED_SIZE = 32'd6;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;

 localparam SVADU_SUPPORTED = 1;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/rv64i/config.vh
+++ b/config/rv64i/config.vh
@ -146,6 +146,7 @@ localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BAS
 localparam BPRED_SIZE = 32'd10;
 localparam BPRED_NUM_LHR = 32'd6;
 localparam BTB_SIZE = 32'd10;
+localparam RAS_SIZE = 32'd16;

 localparam SVADU_SUPPORTED = 0;
 localparam ZMMUL_SUPPORTED = 0;
--- a/config/shared/parameter-defs.vh
+++ b/config/shared/parameter-defs.vh
@ -89,6 +89,7 @@ localparam cvw_t P = '{
  BPRED_SIZE :        BPRED_SIZE,
  BPRED_NUM_LHR : BPRED_NUM_LHR,                       
  BTB_SIZE :        BTB_SIZE,
+  RAS_SIZE :        RAS_SIZE,
  RADIX :        RADIX,
  DIVCOPIES :        DIVCOPIES,
  ZBA_SUPPORTED :        ZBA_SUPPORTED,
--- a/linux/buildroot-config-src/buildroot-2023.05.1/main.config
+++ b/linux/buildroot-config-src/buildroot-2023.05.1/main.config
--- a/linux/devicetree/wally-artya7.dts
+++ b/linux/devicetree/wally-artya7.dts
@ -9,7 +9,7 @@
 	chosen {
 		linux,initrd-end = <0x85c43a00>;
 		linux,initrd-start = <0x84200000>;
-		bootargs = "root=/dev/vda ro";
+		bootargs = "root=/dev/vda ro  console=ttyS0,115200";
 		stdout-path = "/soc/uart@10000000";
 	};

--- a/sim/bp-results/branch-list.txt
+++ b/sim/bp-results/branch-list.txt
@ -0,0 +1,12 @@
+gshare6.log gshare 6
+gshare8.log gshare 8
+gshare10.log gshare 10
+gshare12.log gshare 12
+gshare14.log gshare 14
+gshare16.log gshare 16
+twobit6.log twobit 6
+twobit8.log twobit 8
+twobit10.log twobit 10
+twobit12.log twobit 12
+twobit14.log twobit 14
+twobit16.log twobit 16
--- a/sim/bp-results/btb-list.txt
+++ b/sim/bp-results/btb-list.txt
@ -0,0 +1,6 @@
+btb6.log btb 6
+btb8.log btb 8
+btb10.log btb 10
+btb12.log btb 12
+btb14.log btb 14
+btb16.log btb 16
--- a/sim/bp-results/class-list.txt
+++ b/sim/bp-results/class-list.txt
@ -0,0 +1,6 @@
+class6.log class 6
+class8.log class 8
+class10.log class 10
+class12.log class 12
+class14.log class 14
+class16.log class 16
--- a/sim/bp-results/ras-list.txt
+++ b/sim/bp-results/ras-list.txt
@ -0,0 +1,5 @@
+ras3.log ras 3
+ras4.log ras 4
+ras6.log ras 6
+ras10.log ras 10
+ras16.log ras 16
--- a/sim/bpred-sim.py
+++ b/sim/bpred-sim.py
@ -46,18 +46,40 @@ configs = [
    )
 ]

-bpdSize = [6, 8, 10, 12, 14, 16]
-bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
-for CurrBPType in bpdType:
-    for CurrBPSize in bpdSize:
-        name = CurrBPType+str(CurrBPSize)
-        configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=" + str(bpdType.index(CurrBPType)) + "+define+BPRED_SIZE=" + str(CurrBPSize)
-        tc = TestCase(
-            name=name,
-            variant="rv32gc",
-            cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
-            grepstr="")
-        configs.append(tc)
+# bpdSize = [6, 8, 10, 12, 14, 16]
+# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
+# for CurrBPType in bpdType:
+#     for CurrBPSize in bpdSize:
+#         name = CurrBPType+str(CurrBPSize)
+#         configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=" + str(bpdType.index(CurrBPType)) + "+define+BPRED_SIZE=" + str(CurrBPSize)
+#         tc = TestCase(
+#             name=name,
+#             variant="rv32gc",
+#             cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
+#             grepstr="")
+#         configs.append(tc)
+
+# bpdSize = [6, 8, 10, 12, 14, 16]
+# for CurrBPSize in bpdSize:
+#     name = 'BTB'+str(CurrBPSize)
+#     configOptions = "+define+INSTR_CLASS_PRED=1 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE"
+#     tc = TestCase(
+#         name=name,
+#         variant="rv32gc",
+#         cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
+#         grepstr="")
+#     configs.append(tc)
+
+bpdSize = [2, 3, 4, 6, 10, 16]
+for CurrBPSize in bpdSize:
+    name = 'RAS'+str(CurrBPSize)
+    configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=16" + "+define+RAS_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE+define+RAS_OVERRIDE"
+    tc = TestCase(
+        name=name,
+        variant="rv32gc",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do  rv32gc configOptions " + name + " embench " + configOptions,
+        grepstr="")
+    configs.append(tc)
    
 # bpdSize = [6, 8, 10, 12, 14, 16]
 # LHRSize = [4, 8, 10]
--- a/sim/wave.do
+++ b/sim/wave.do
@ -299,7 +299,15 @@ add wave -noupdate -group {WriteBack stage} /testbench/InstrW
 add wave -noupdate -group {WriteBack stage} /testbench/InstrWName
 add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRM
 add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} -label PHT /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/mem
-add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} -divider {class check}
+add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[5]}
+add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[4]}
+add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[3]}
+add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[2]}
+add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[1]}
+add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[0]}
+add wave -noupdate -expand -group Bpred -expand -group RAS -expand /testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory
+add wave -noupdate -expand -group Bpred -expand -group RAS /testbench/dut/core/ifu/bpred/bpred/RASPredictor/Ptr
+add wave -noupdate -expand -group Bpred -divider {class check}
 add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
 add wave -noupdate -expand -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
 add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
@ -578,31 +586,31 @@ add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/c
 add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/core/ifu/immu/immu/tlb/tlb/tlbcam/camlines[19]/Key1}
 add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/core/ifu/immu/immu/tlb/tlb/tlbcam/camlines[19]/Query0}
 add wave -noupdate -group ifu -group itlb -expand -group key19 {/testbench/dut/core/ifu/immu/immu/tlb/tlb/tlbcam/camlines[19]/Query1}
-add wave -noupdate -expand -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]}
-add wave -noupdate -expand -group {Performance Counters} -label MINSTRET -radix hexadecimal {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Branch -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {Jump (Not Return)} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[4]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label Return -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[5]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[6]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BTA Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[8]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[9]}
-add wave -noupdate -expand -group {Performance Counters} -expand -group BP -label {BP CLASS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[10]}
-add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Access} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
-add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
-add wave -noupdate -expand -group {Performance Counters} -group ICACHE -label {I Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
-add wave -noupdate -expand -group {Performance Counters} -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
-add wave -noupdate -expand -group {Performance Counters} -group Privileged -label {CSR Write} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[19]}
-add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Fence.I {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[20]}
-add wave -noupdate -expand -group {Performance Counters} -group Privileged -label sfence.VMA {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[21]}
-add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Interrupt {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[22]}
-add wave -noupdate -expand -group {Performance Counters} -group Privileged -label Exception {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[23]}
-add wave -noupdate -expand -group {Performance Counters} -label {FDiv or IDiv Cycles} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[24]}
-add wave -noupdate -expand -group {Performance Counters} /testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW
+add wave -noupdate -group {Performance Counters} -label MCYCLE -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[0]}
+add wave -noupdate -group {Performance Counters} -label MINSTRET -radix hexadecimal {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[2]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label Branch -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[3]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[7]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label {Jump (Not Return)} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[4]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label Return -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[5]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label {BP Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[6]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label {BTA Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[8]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[9]}
+add wave -noupdate -group {Performance Counters} -expand -group BP -label {BP CLASS WRONG} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[10]}
+add wave -noupdate -group {Performance Counters} -group ICACHE -label {I Cache Access} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[16]}
+add wave -noupdate -group {Performance Counters} -group ICACHE -label {I Cache Miss} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[17]}
+add wave -noupdate -group {Performance Counters} -group ICACHE -label {I Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[18]}
+add wave -noupdate -group {Performance Counters} -group DCACHE -label {Load Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[11]}
+add wave -noupdate -group {Performance Counters} -group DCACHE -label {Store Stall} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[12]}
+add wave -noupdate -group {Performance Counters} -group DCACHE -label {DCACHE MISS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[14]}
+add wave -noupdate -group {Performance Counters} -group DCACHE -label {DCACHE ACCESS} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[13]}
+add wave -noupdate -group {Performance Counters} -group DCACHE -label {D Cache Miss Cycles} -radix unsigned {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[15]}
+add wave -noupdate -group {Performance Counters} -group Privileged -label {CSR Write} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[19]}
+add wave -noupdate -group {Performance Counters} -group Privileged -label Fence.I {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[20]}
+add wave -noupdate -group {Performance Counters} -group Privileged -label sfence.VMA {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[21]}
+add wave -noupdate -group {Performance Counters} -group Privileged -label Interrupt {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[22]}
+add wave -noupdate -group {Performance Counters} -group Privileged -label Exception {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[23]}
+add wave -noupdate -group {Performance Counters} -label {FDiv or IDiv Cycles} {/testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW[24]}
+add wave -noupdate -group {Performance Counters} /testbench/dut/core/priv/priv/csr/counters/counters/HPMCOUNTER_REGW
 add wave -noupdate -group {ifu } -color Gold /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/CurrState
 add wave -noupdate -group {ifu } /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/HREADY
 add wave -noupdate -group {ifu } /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/FetchBuffer
@ -677,7 +685,7 @@ add wave -noupdate /testbench/dut/core/fpu/fpu/fctrl/IllegalFPUInstrD
 add wave -noupdate /testbench/dut/core/fpu/fpu/fctrl/STATUS_FS
 add wave -noupdate /testbench/dut/core/priv/priv/csr/csrsr/STATUS_FS_INT
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 4} {172636 ns} 1} {{Cursor 4} {5101 ns} 0} {{Cursor 3} {152766 ns} 1}
+WaveRestoreCursors {{Cursor 4} {172636 ns} 1} {{Cursor 4} {111958 ns} 0} {{Cursor 3} {152766 ns} 1}
 quietly wave cursor active 2
 configure wave -namecolwidth 250
 configure wave -valuecolwidth 194
@ -693,4 +701,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {4326 ns} {6929 ns}
+WaveRestoreZoom {37879604 ns} {38203328 ns}
--- a/src/cvw.sv
+++ b/src/cvw.sv
@ -149,6 +149,7 @@ typedef struct packed {
  int                  BPRED_NUM_LHR;
  int                  BPRED_SIZE;
  int                  BTB_SIZE;
+  int                  RAS_SIZE;

 // FPU division architecture
  int           RADIX;
--- a/src/ifu/bpred/RASPredictor.sv
+++ b/src/ifu/bpred/RASPredictor.sv
@ -27,8 +27,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-module RASPredictor import cvw::*;  #(parameter cvw_t P, 
-                                      parameter StackSize = 16 )(
+module RASPredictor import cvw::*;  #(parameter cvw_t P)(
  input  logic             clk,
  input  logic             reset, 
  input  logic             StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
@ -41,10 +40,10 @@ module RASPredictor import cvw::*;  #(parameter cvw_t P,
   );

  logic                     CounterEn;
-  localparam Depth = $clog2(StackSize);
+  localparam Depth = $clog2(P.RAS_SIZE);

  logic [Depth-1:0]         NextPtr, Ptr, P1, M1, IncDecPtr;
-  logic [StackSize-1:0]     [P.XLEN-1:0] memory;
+  logic [P.RAS_SIZE-1:0]     [P.XLEN-1:0] memory;
  integer        index;

  logic      PopF;
@ -76,14 +75,20 @@ module RASPredictor import cvw::*;  #(parameter cvw_t P,
  assign P1 = 1;
  assign M1 = '1; // -1
  mux2 #(Depth) PtrMux(P1, M1, DecrementPtr, IncDecPtr);
-  assign NextPtr = Ptr + IncDecPtr;
+  logic [Depth-1:0] Sum;
+  assign Sum = Ptr + IncDecPtr;
+  if(|P.RAS_SIZE[Depth-1:0])
+    assign NextPtr = Sum >= P.RAS_SIZE[Depth-1:0] ? 0 : Sum; // wrap back around if our stack is not a power of 2
+  else
+    assign NextPtr = Sum;
+  //assign NextPtr = Ptr + IncDecPtr;

  flopenr #(Depth) PTR(clk, reset, CounterEn, NextPtr, Ptr);

  // RAS must be reset. 
  always_ff @ (posedge clk) begin
    if(reset) begin
-      for(index=0; index<StackSize; index++)
+      for(index=0; index<P.RAS_SIZE; index++)
    memory[index] <= {P.XLEN{1'b0}};
    end else if(PushE) begin
      memory[NextPtr] <= #1 PCLinkE;