diff --git a/bin/CModelBranchAccuracy.sh b/bin/CModelBranchAccuracy.sh new file mode 100755 index 000000000..1b94f7c9a --- /dev/null +++ b/bin/CModelBranchAccuracy.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +########################################### +## Written: ross1728@gmail.com +## Created: 12 March 2023 +## Modified: +## +## Purpose: Takes a directory of branch outcomes organized as 1 files per benchmark. +## Computes the geometric mean. +## +## A component of the CORE-V-WALLY configurable RISC-V project. +## +## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +## +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +## +## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +## except in compliance with the License, or, at your option, the Apache License version 2.0. You +## may obtain a copy of the License at +## +## https:##solderpad.org/licenses/SHL-2.1/ +## +## Unless required by applicable law or agreed to in writing, any work distributed under the +## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +## either express or implied. See the License for the specific language governing permissions +## and limitations under the License. +################################################################################################ + + +Directory="$1" +Files="$1/*.log" + +for Pred in "bimodal" "gshare" +do + for Size in $(seq 6 2 16) + do + if [ $Pred = "gshare" ]; then + SizeString="$Size $Size 18 1" + elif [ $Pred = "bimodal" ]; then + SizeString="$Size 18 1" + fi + + Product=1.0 + Count=0 + for File in $Files + do + #echo "sim_bp $Pred $Size $Size 18 1 $File | tail -1 | awk '{print $4}'" + #echo "sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'" + BMDR=`sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'` + Product=`echo "$Product * $BMDR" | bc` + Count=$((Count+1)) + done + + GeoMean=`perl -E "say $Product**(1/$Count)"` + echo "$Pred$Size $GeoMean" + done +done diff --git a/bin/SeparateBranch.sh b/bin/SeparateBranch.sh new file mode 100755 index 000000000..c5ebb5de0 --- /dev/null +++ b/bin/SeparateBranch.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +########################################### +## Written: ross1728@gmail.com +## Created: 12 March 2023 +## Modified: +## +## Purpose: Converts a single branch.log containing multiple benchmark branch outcomes into +## separate files, one for each program.x4 +## +## A component of the CORE-V-WALLY configurable RISC-V project. +## +## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +## +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +## +## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +## except in compliance with the License, or, at your option, the Apache License version 2.0. You +## may obtain a copy of the License at +## +## https:##solderpad.org/licenses/SHL-2.1/ +## +## Unless required by applicable law or agreed to in writing, any work distributed under the +## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +## either express or implied. See the License for the specific language governing permissions +## and limitations under the License. +################################################################################################ + +File="$1" +TrainLineNumbers=`cat $File | grep -n "TRAIN" | awk -NF ':' '{print $1}'` +BeginLineNumbers=`cat $File | grep -n "BEGIN" | awk -NF ':' '{print $1}'` +Name=`cat $File | grep -n "BEGIN" | awk -NF '/' '{print $6_$4}'` +EndLineNumbers=`cat $File | grep -n "END" | awk -NF ':' '{print $1}'` +echo $Name +echo $BeginLineNumbers +echo $EndLineNumbers + +NameArray=($Name) +TrainLineNumberArray=($TrainLineNumbers) +BeginLineNumberArray=($BeginLineNumbers) +EndLineNumberArray=($EndLineNumbers) + +mkdir -p branch +Length=${#EndLineNumberArray[@]} +for i in $(seq 0 1 $((Length-1))) +do + CurrName=${NameArray[$i]} + CurrTrain=$((${TrainLineNumberArray[$i]}+1)) + CurrEnd=$((${EndLineNumberArray[$i]}-1)) + echo $CurrName, $CurrTrain, $CurrEnd + sed -n "${CurrTrain},${CurrEnd}p" $File > branch/${CurrName}_branch.log +done diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 3229d1c74..5b5e0d98b 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -30,6 +30,18 @@ import sys import matplotlib.pyplot as plt import re +#RefData={'twobitCModel' :(['6', '8', '10', '12', '14', '16'], +# [11.0680836450622, 8.53864970807778, 7.59565430177984, 6.38741598498948, 5.83662961500838, 5.83662961500838]), +# 'gshareCModel' : (['6', '8', '10', '12', '14', '16'], +# [14.5859173702079, 12.3634674403619, 10.5806018170154, 8.38831266973592, 6.37097544620762, 3.52638362703015]) +#} + +RefData = [('twobitCModel6', 11.0501534891674), ('twobitCModel8', 8.51829052266352), ('twobitCModel10', 7.56775222626483), + ('twobitCModel12', 6.31366834586515), ('twobitCModel14', 5.72699936834177), ('twobitCModel16', 5.72699936834177), + ('gshareCModel6', 14.5731555979574), ('gshareCModel8', 12.3155658100497), ('gshareCModel10', 10.4589596630561), + ('gshareCModel12', 8.25796055444401), ('gshareCModel14', 6.23093702707613), ('gshareCModel16', 3.34001125650374)] + + def ComputeCPI(benchmark): 'Computes and inserts CPI into benchmark stats.' (nameString, opt, dataDict) = benchmark @@ -221,14 +233,15 @@ if(sys.argv[1] == '-b'): for benchmark in benchmarkAll: (name, opt, config, dataDict) = benchmark if name+'_'+opt in benchmarkDict: - benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR'])) + benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) else: - benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])] + benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] size = len(benchmarkDict) index = 1 if(summery == 0): #print('Number of plots', size) + for benchmarkName in benchmarkDict: currBenchmark = benchmarkDict[benchmarkName] (names, values) = FormatToPlot(currBenchmark) @@ -241,6 +254,8 @@ if(sys.argv[1] == '-b'): index += 1 else: combined = benchmarkDict['All_'] + # merge the reference data into rtl data + combined.extend(RefData) (name, value) = FormatToPlot(combined) lst = [] dct = {} @@ -264,8 +279,8 @@ if(sys.argv[1] == '-b'): dct[PredType] = (currSize, currPercent) print(dct) fig, axes = plt.subplots() - marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x'} - colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue'} + marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'} + colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'} for cat in dct: (x, y) = dct[cat] x=[int(2**int(v)) for v in x] diff --git a/bin/sim_bp b/bin/sim_bp new file mode 120000 index 000000000..a85da9901 --- /dev/null +++ b/bin/sim_bp @@ -0,0 +1 @@ +../addins/branch-predictor-simulator/src/sim_bp \ No newline at end of file diff --git a/sim/regression-wally b/sim/regression-wally index e7ce0d302..560f7795b 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -126,8 +126,7 @@ for test in ahbTests: grepstr="All tests ran without failures") configs.append(tc) -#tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"] -tests64gc = ["arch64i", "arch64c", "arch64m"] +tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"] if (coverage): # delete all but 64gc tests when running coverage configs = [] coverStr = '-coverage' diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 72b97a313..de3b8d711 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -29,7 +29,7 @@ `include "wally-config.vh" -module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) ( +module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) ( input logic clk, input logic reset, input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY @@ -39,7 +39,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE input logic [1:0] CacheAtomic, // Atomic operation input logic FlushCache, // Flush all dirty lines back to memory input logic InvalidateCache, // Clear all valid bits - input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits. + input logic [11:0] NextSet, // Virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] PAdr, // Physical address input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only) input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only) @@ -50,7 +50,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE output logic CacheMiss, // Cache miss output logic CacheAccess, // Cache access // lsu control - input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr + input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextSet // Bus fsm interface input logic CacheBusAck, // Bus operation completed input logic SelBusBeat, // Word in cache line comes from BeatCount @@ -74,7 +74,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE logic SelAdr; logic [1:0] AdrSelMuxSel; - logic [SETLEN-1:0] CAdr; + logic [SETLEN-1:0] CacheSet; logic [LINELEN-1:0] LineWriteData; logic ClearValid, ClearDirty, SetDirty, SetValid; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; @@ -106,24 +106,24 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Read Path ///////////////////////////////////////////////////////////////////////////////////////////// - // Choose read address (CAdr). Normally use NextAdr, but use PAdr during stalls + // Choose read address (CacheSet). Normally use NextSet, but use PAdr during stalls // and FlushAdr when handling D$ flushes // The icache must update to the newest PCNextF on flush as it is probably a trap. Trap // sets PCNextF to XTVEC and the icache must start reading the instruction. - assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}; - mux3 #(SETLEN) AdrSelMux(NextAdr[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr, - AdrSelMuxSel, CAdr); + assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))}; + mux3 #(SETLEN) AdrSelMux(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr, + AdrSelMuxSel, CacheSet); // Array of cache ways, along with victim, hit, dirty, and read merging logic - cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0]( - .clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask, + cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( + .clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches if(NUMWAYS > 1) begin:vict cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU( - .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage), + .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CacheSet, .LRUWriteEn(LRUWriteEn & ~FlushStage), .SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache); end else assign VictimWay = 1'b1; // one hot. @@ -138,7 +138,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag)); // Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines - if(DCACHE) + if(!READ_ONLY_CACHE) mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), .d1(BeatCount), .s(SelBusBeat), .y(WordOffsetAddr)); diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 2e3057f0c..05e26f4bf 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -37,7 +37,7 @@ module cacheLRU input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant input logic [NUMWAYS-1:0] HitWay, // Which way is valid and matches PAdr's tag input logic [NUMWAYS-1:0] ValidWay, // Which ways for a particular set are valid, ignores tag - input logic [SETLEN-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [SETLEN-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr input logic [SETLEN-1:0] PAdr, // Physical address input logic LRUWriteEn, // Update the LRU state input logic SetValid, // Set the dirty bit in the selected way and set @@ -124,8 +124,7 @@ module cacheLRU // LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice. // This is a two port memory. - // Every cycle must read from CAdr and each load/store must write the new LRU. - // this is still wrong.*************************** + // Every cycle must read from CacheSet and each load/store must write the new LRU. always_ff @(posedge clk) begin if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; if(CacheEn) begin @@ -133,10 +132,10 @@ module cacheLRU else if (LRUWriteEn & ~FlushStage) begin LRUMemory[PAdr] <= NextLRU; end - if(LRUWriteEn & ~FlushStage & (PAdr == CAdr)) + if(LRUWriteEn & ~FlushStage & (PAdr == CacheSet)) CurrLRU <= #1 NextLRU; else - CurrLRU <= #1 LRUMemory[CAdr]; + CurrLRU <= #1 LRUMemory[CacheSet]; end end diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 671bbcaff..da40ab705 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -30,12 +30,12 @@ `include "wally-config.vh" module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, - OFFSETLEN = 5, INDEXLEN = 9, DIRTY_BITS = 1) ( + OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) ( input logic clk, input logic reset, input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant - input logic [$clog2(NUMLINES)-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr input logic [`PA_BITS-1:0] PAdr, // Physical address input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only) input logic SetValid, // Set the dirty bit in the selected way and set @@ -114,7 +114,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, ///////////////////////////////////////////////////////////////////////////////////////////// ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn), - .addr(CAdr), .dout(ReadTag), .bwe('1), + .addr(CacheSet), .dout(ReadTag), .bwe('1), .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); @@ -136,7 +136,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, localparam LOGNUMSRAM = $clog2(NUMSRAM); for(words = 0; words < NUMSRAM; words++) begin: word - ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CAdr), + ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet), .dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), .din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), .we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); @@ -152,9 +152,9 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset) ValidBits <= #1 '0; if(CacheEn) begin - ValidWay <= #1 ValidBits[CAdr]; + ValidWay <= #1 ValidBits[CacheSet]; if(InvalidateCache) ValidBits <= #1 '0; - else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay; + else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CacheSet] <= #1 SetValidWay; end end @@ -163,13 +163,13 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, ///////////////////////////////////////////////////////////////////////////////////////////// // Dirty bits - if (DIRTY_BITS) begin:dirty + if (!READ_ONLY_CACHE) begin:dirty always_ff @(posedge clk) begin // reset is optional. Consider merging with TAG array in the future. //if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; if(CacheEn) begin - Dirty <= #1 DirtyBits[CAdr]; - if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CAdr] <= #1 SetDirtyWay; + Dirty <= #1 DirtyBits[CacheSet]; + if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CacheSet] <= #1 SetDirtyWay; end end end else assign Dirty = 1'b0; diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 85d23d373..cf3a22c1f 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -43,7 +43,7 @@ module hazard ( ); logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause; - logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW; + logic LatestUnstalledD, LatestUnstalledE, LatestUnstalledM, LatestUnstalledW; logic FlushDCause, FlushECause, FlushMCause, FlushWCause; // stalls and flushes @@ -95,14 +95,14 @@ module hazard ( assign #1 StallW = StallWCause; // detect the first stage that is not stalled - assign FirstUnstalledD = ~StallD & StallF; - assign FirstUnstalledE = ~StallE & StallD; - assign FirstUnstalledM = ~StallM & StallE; - assign FirstUnstalledW = ~StallW & StallM; + assign LatestUnstalledD = ~StallD & StallF; + assign LatestUnstalledE = ~StallE & StallD; + assign LatestUnstalledM = ~StallM & StallE; + assign LatestUnstalledW = ~StallW & StallM; // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush - assign #1 FlushD = FirstUnstalledD | FlushDCause; - assign #1 FlushE = FirstUnstalledE | FlushECause; - assign #1 FlushM = FirstUnstalledM | FlushMCause; - assign #1 FlushW = FirstUnstalledW | FlushWCause; + assign #1 FlushD = LatestUnstalledD | FlushDCause; + assign #1 FlushE = LatestUnstalledE | FlushECause; + assign #1 FlushM = LatestUnstalledM | FlushMCause; + assign #1 FlushW = LatestUnstalledW | FlushWCause; endmodule diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index f2f16b51e..97aec3868 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -71,7 +71,7 @@ module bpred ( logic [1:0] BPDirPredF; - logic [`XLEN-1:0] BTAF, RASPCF; + logic [`XLEN-1:0] BPBTAF, RASPCF; logic BPPCWrongE; logic IClassWrongE; logic BPDirPredWrongE; @@ -85,7 +85,7 @@ module bpred ( logic BTBTargetWrongE; logic RASTargetWrongE; - logic [`XLEN-1:0] BTAD; + logic [`XLEN-1:0] BPBTAD; logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF; logic BPBranchF, BPJumpF, BPReturnF, BPCallF; @@ -95,7 +95,7 @@ module bpred ( logic BranchM, JumpM, ReturnM, CallM; logic BranchW, JumpW, ReturnW, CallW; logic BPReturnWrongD; - logic [`XLEN-1:0] BTAE; + logic [`XLEN-1:0] BPBTAE; @@ -150,7 +150,7 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, - .BTAF, .BTAD, .BTAE, + .BPBTAF, .BPBTAD, .BPBTAE, .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), .IClassWrongM, .IClassWrongE, .IEUAdrE, .IEUAdrM, @@ -181,7 +181,7 @@ module bpred ( // Output the predicted PC or corrected PC on miss-predict. assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF; - mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF); + mux2 #(`XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF); // Selects the BP or PC+2/4. mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF); // If the prediction is wrong select the correct address. @@ -196,7 +196,7 @@ module bpred ( if(`ZICOUNTERS_SUPPORTED) begin logic [`XLEN-1:0] RASPCD, RASPCE; - logic BTBPredPCWrongE, RASPredPCWrongE; + logic BTAWrongE, RASPredPCWrongE; // performance counters // 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) @@ -207,14 +207,14 @@ module bpred ( // could be wrong or the fall through address selected for branch predict not taken. // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // both without the above inaccuracies. - // **** use BTAWrongM from BTB. - assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; + // **** use BPBTAWrongM from BTB. + assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, - {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, + {BPDirPredWrongE, BTAWrongE, RASPredPCWrongE}, {BPDirPredWrongM, BTAWrongM, RASPredPCWrongM}); end else begin diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index b14399704..ab11b48be 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -35,9 +35,9 @@ module btb #(parameter Depth = 10 ) ( input logic reset, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages - output logic [`XLEN-1:0] BTAF, // BTB's guess at PC - output logic [`XLEN-1:0] BTAD, - output logic [`XLEN-1:0] BTAE, + output logic [`XLEN-1:0] BPBTAF, // BTB's guess at PC + output logic [`XLEN-1:0] BPBTAD, + output logic [`XLEN-1:0] BPBTAE, output logic [3:0] BTBIClassF, // BTB's guess at instruction class // update input logic IClassWrongM, // BTB's instruction class guess was wrong @@ -57,8 +57,8 @@ module btb #(parameter Depth = 10 ) ( logic [`XLEN+3:0] TableBTBPredF; logic [`XLEN-1:0] IEUAdrW; logic [`XLEN-1:0] PCW; - logic BTBWrongE, BTAWrongE; - logic BTBWrongM, BTAWrongM; + logic BTBWrongE, BPBTAWrongE; + logic BTBWrongM, BPBTAWrongM; // hashing function for indexing the PC @@ -84,12 +84,12 @@ module btb #(parameter Depth = 10 ) ( assign MatchW = PCFIndex == PCWIndex; assign MatchX = MatchD | MatchE | MatchM | MatchW; - assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} : + assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BPBTAD} : MatchE ? {InstrClassE, IEUAdrE} : MatchM ? {InstrClassM, IEUAdrM} : {InstrClassW, IEUAdrW} ; - assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; + assign {BTBIClassF, BPBTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; // An optimization may be using a PC relative address. @@ -97,16 +97,16 @@ module btb #(parameter Depth = 10 ) ( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); - flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); + flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD); - // BTAE is not strickly necessary. However it is used by two parts of wally. + // BPBTAE is not strickly necessary. However it is used by two parts of wally. // 1. It gates updates to the BTB when the prediction does not change. This save power. - // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. - flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); - assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); + // 2. BPBTAWrongE is used by the performance counters to track when the BTB's BPBTA or instruction class is wrong. + flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE); + assign BPBTAWrongE = (BPBTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); - flopenrc #(1) BTAWrongMReg(clk, reset, FlushM, ~StallM, BTAWrongE, BTAWrongM); - assign BTBWrongM = BTAWrongM | IClassWrongM; + flopenrc #(1) BPBTAWrongMReg(clk, reset, FlushM, ~StallM, BPBTAWrongE, BPBTAWrongM); + assign BTBWrongM = BPBTAWrongM | IClassWrongM; flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW); diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 41b0de00e..d8d48cbf4 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -233,7 +233,7 @@ module ifu ( assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), - .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) + .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1)) icache(.clk, .reset, .FlushStage(FlushD), .Stall(GatedStallD), .FetchBuffer, .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), @@ -245,7 +245,7 @@ module ifu ( .CacheWriteData('0), .CacheRW(CacheRWF), .CacheAtomic('0), .FlushCache('0), - .NextAdr(PCSpillNextF[11:0]), + .NextSet(PCSpillNextF[11:0]), .PAdr(PCPF), .CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM)); ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 91ad694e9..628c85bbd 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -264,9 +264,9 @@ module lsu ( assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW); cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), - .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .DCACHE(1)) dcache( + .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), - .FlushCache(FlushDCache), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM), + .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), diff --git a/testbench/testbench.sv b/testbench/testbench.sv index b0af190a9..fe3875cbb 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -69,6 +69,7 @@ logic [3:0] dummy; logic DCacheFlushDone, DCacheFlushStart; logic riscofTest; + logic StartSample, EndSample; flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW); flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW); @@ -405,8 +406,7 @@ logic [3:0] dummy; integer HPMCindex; logic StartSampleFirst; logic StartSampleDelayed; - logic StartSample; - logic EndSample, EndSampleFirst, EndSampleDelayed; + logic EndSampleFirst, EndSampleDelayed; logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0]; string HPMCnames[] = '{"Mcycle", @@ -544,15 +544,23 @@ logic [3:0] dummy; string direction; int file; logic PCSrcM; + string LogFile; + logic resetD, resetEdge; flopenrc #(1) PCSrcMReg(clk, reset, dut.core.FlushM, ~dut.core.StallM, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PCSrcE, PCSrcM); + flop #(1) ResetDReg(clk, reset, resetD); + assign resetEdge = ~reset & resetD; initial begin - file = $fopen("branch.log", "w"); + LogFile = $psprintf("branch_%s%0d.log", `BPRED_TYPE, `BPRED_SIZE); + file = $fopen(LogFile, "w"); end always @(posedge clk) begin + if(resetEdge) $fwrite(file, "TRAIN\n"); + if(StartSample) $fwrite(file, "BEGIN %s\n", memfilename); if(dut.core.ifu.InstrClassM[0] & ~dut.core.StallW & ~dut.core.FlushW & dut.core.InstrValidM) begin direction = PCSrcM ? "t" : "n"; $fwrite(file, "%h %s\n", dut.core.PCM, direction); end + if(EndSample) $fwrite(file, "END %s\n", memfilename); end end end