Merge branch 'main' of https://github.com/openhwgroup/cvw into dev

This commit is contained in:
David Harris 2023-03-18 09:24:37 -07:00
commit 70e4c71f41
17 changed files with 220 additions and 73 deletions

View File

@ -261,6 +261,22 @@ $ make all
Note: When the make tasks complete, youll find source code in $RISCV/buildroot/output/build and the executables in $RISCV/buildroot/output/images.
### Generate load images for linux boot
The Questa linux boot uses preloaded bootram and ram memory. We use QEMU to generate these preloaded memory files. Files output in $RISCV/linux-testvectors
cd cvw/linux/testvector-generation
./genInitMem.sh
This may require changing file permissions to the linux-testvectors directory.
### Generate QEMU linux trace
The linux testbench can instruction by instruction compare Wally's committed instructions against QEMU. To do this QEMU outputs a log file consisting of all instructions executed. Interrupts are handled by forcing the testbench to generate an interrupt at the same cycle as in QEMU. Generating this trace will take more than 24 hours.
cd cvw/linux/testvector-generation
./genTrace.sh
### Download Synthesis Libraries
For logic synthesis, we need a synthesis tool (see Section 3.XREF) and a cell library. Clone the OSU 12-track cell library for the Skywater 130 nm process:

57
bin/CModelBranchAccuracy.sh Executable file
View File

@ -0,0 +1,57 @@
#!/bin/bash
###########################################
## Written: ross1728@gmail.com
## Created: 12 March 2023
## Modified:
##
## Purpose: Takes a directory of branch outcomes organized as 1 files per benchmark.
## Computes the geometric mean.
##
## A component of the CORE-V-WALLY configurable RISC-V project.
##
## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
##
## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
##
## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
## except in compliance with the License, or, at your option, the Apache License version 2.0. You
## may obtain a copy of the License at
##
## https:##solderpad.org/licenses/SHL-2.1/
##
## Unless required by applicable law or agreed to in writing, any work distributed under the
## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
## either express or implied. See the License for the specific language governing permissions
## and limitations under the License.
################################################################################################
Directory="$1"
Files="$1/*.log"
for Pred in "bimodal" "gshare"
do
for Size in $(seq 6 2 16)
do
if [ $Pred = "gshare" ]; then
SizeString="$Size $Size 18 1"
elif [ $Pred = "bimodal" ]; then
SizeString="$Size 18 1"
fi
Product=1.0
Count=0
for File in $Files
do
#echo "sim_bp $Pred $Size $Size 18 1 $File | tail -1 | awk '{print $4}'"
#echo "sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'"
BMDR=`sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'`
Product=`echo "$Product * $BMDR" | bc`
Count=$((Count+1))
done
GeoMean=`perl -E "say $Product**(1/$Count)"`
echo "$Pred$Size $GeoMean"
done
done

52
bin/SeparateBranch.sh Executable file
View File

@ -0,0 +1,52 @@
#!/bin/bash
###########################################
## Written: ross1728@gmail.com
## Created: 12 March 2023
## Modified:
##
## Purpose: Converts a single branch.log containing multiple benchmark branch outcomes into
## separate files, one for each program.x4
##
## A component of the CORE-V-WALLY configurable RISC-V project.
##
## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
##
## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
##
## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
## except in compliance with the License, or, at your option, the Apache License version 2.0. You
## may obtain a copy of the License at
##
## https:##solderpad.org/licenses/SHL-2.1/
##
## Unless required by applicable law or agreed to in writing, any work distributed under the
## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
## either express or implied. See the License for the specific language governing permissions
## and limitations under the License.
################################################################################################
File="$1"
TrainLineNumbers=`cat $File | grep -n "TRAIN" | awk -NF ':' '{print $1}'`
BeginLineNumbers=`cat $File | grep -n "BEGIN" | awk -NF ':' '{print $1}'`
Name=`cat $File | grep -n "BEGIN" | awk -NF '/' '{print $6_$4}'`
EndLineNumbers=`cat $File | grep -n "END" | awk -NF ':' '{print $1}'`
echo $Name
echo $BeginLineNumbers
echo $EndLineNumbers
NameArray=($Name)
TrainLineNumberArray=($TrainLineNumbers)
BeginLineNumberArray=($BeginLineNumbers)
EndLineNumberArray=($EndLineNumbers)
mkdir -p branch
Length=${#EndLineNumberArray[@]}
for i in $(seq 0 1 $((Length-1)))
do
CurrName=${NameArray[$i]}
CurrTrain=$((${TrainLineNumberArray[$i]}+1))
CurrEnd=$((${EndLineNumberArray[$i]}-1))
echo $CurrName, $CurrTrain, $CurrEnd
sed -n "${CurrTrain},${CurrEnd}p" $File > branch/${CurrName}_branch.log
done

View File

@ -30,6 +30,18 @@ import sys
import matplotlib.pyplot as plt
import re
#RefData={'twobitCModel' :(['6', '8', '10', '12', '14', '16'],
# [11.0680836450622, 8.53864970807778, 7.59565430177984, 6.38741598498948, 5.83662961500838, 5.83662961500838]),
# 'gshareCModel' : (['6', '8', '10', '12', '14', '16'],
# [14.5859173702079, 12.3634674403619, 10.5806018170154, 8.38831266973592, 6.37097544620762, 3.52638362703015])
#}
RefData = [('twobitCModel6', 11.0501534891674), ('twobitCModel8', 8.51829052266352), ('twobitCModel10', 7.56775222626483),
('twobitCModel12', 6.31366834586515), ('twobitCModel14', 5.72699936834177), ('twobitCModel16', 5.72699936834177),
('gshareCModel6', 14.5731555979574), ('gshareCModel8', 12.3155658100497), ('gshareCModel10', 10.4589596630561),
('gshareCModel12', 8.25796055444401), ('gshareCModel14', 6.23093702707613), ('gshareCModel16', 3.34001125650374)]
def ComputeCPI(benchmark):
'Computes and inserts CPI into benchmark stats.'
(nameString, opt, dataDict) = benchmark
@ -221,14 +233,15 @@ if(sys.argv[1] == '-b'):
for benchmark in benchmarkAll:
(name, opt, config, dataDict) = benchmark
if name+'_'+opt in benchmarkDict:
benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR']))
benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR']))
else:
benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])]
benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])]
size = len(benchmarkDict)
index = 1
if(summery == 0):
#print('Number of plots', size)
for benchmarkName in benchmarkDict:
currBenchmark = benchmarkDict[benchmarkName]
(names, values) = FormatToPlot(currBenchmark)
@ -241,6 +254,8 @@ if(sys.argv[1] == '-b'):
index += 1
else:
combined = benchmarkDict['All_']
# merge the reference data into rtl data
combined.extend(RefData)
(name, value) = FormatToPlot(combined)
lst = []
dct = {}
@ -264,8 +279,8 @@ if(sys.argv[1] == '-b'):
dct[PredType] = (currSize, currPercent)
print(dct)
fig, axes = plt.subplots()
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x'}
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue'}
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'}
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'}
for cat in dct:
(x, y) = dct[cat]
x=[int(2**int(v)) for v in x]

1
bin/sim_bp Symbolic link
View File

@ -0,0 +1 @@
../addins/branch-predictor-simulator/src/sim_bp

View File

@ -6,6 +6,7 @@
--override cpu/ignore_non_leaf_DAU=1
--override cpu/wfi_is_nop=T
--override cpu/mimpid=0x100
--override cpu/misa_Extensions_mask=0x0
# THIS NEEDS FIXING to 16
--override cpu/PMP_registers=0

View File

@ -126,8 +126,7 @@ for test in ahbTests:
grepstr="All tests ran without failures")
configs.append(tc)
#tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"]
tests64gc = ["arch64i", "arch64c", "arch64m"]
tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"]
if (coverage): # delete all but 64gc tests when running coverage
configs = []
coverStr = '-coverage'

24
src/cache/cache.sv vendored
View File

@ -29,7 +29,7 @@
`include "wally-config.vh"
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
input logic clk,
input logic reset,
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
@ -39,7 +39,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
input logic [1:0] CacheAtomic, // Atomic operation
input logic FlushCache, // Flush all dirty lines back to memory
input logic InvalidateCache, // Clear all valid bits
input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits.
input logic [11:0] NextSet, // Virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // Physical address
input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
@ -50,7 +50,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
output logic CacheMiss, // Cache miss
output logic CacheAccess, // Cache access
// lsu control
input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr
input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextSet
// Bus fsm interface
input logic CacheBusAck, // Bus operation completed
input logic SelBusBeat, // Word in cache line comes from BeatCount
@ -74,7 +74,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
logic SelAdr;
logic [1:0] AdrSelMuxSel;
logic [SETLEN-1:0] CAdr;
logic [SETLEN-1:0] CacheSet;
logic [LINELEN-1:0] LineWriteData;
logic ClearValid, ClearDirty, SetDirty, SetValid;
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
@ -106,24 +106,24 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
// Read Path
/////////////////////////////////////////////////////////////////////////////////////////////
// Choose read address (CAdr). Normally use NextAdr, but use PAdr during stalls
// Choose read address (CacheSet). Normally use NextSet, but use PAdr during stalls
// and FlushAdr when handling D$ flushes
// The icache must update to the newest PCNextF on flush as it is probably a trap. Trap
// sets PCNextF to XTVEC and the icache must start reading the instruction.
assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))};
mux3 #(SETLEN) AdrSelMux(NextAdr[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
AdrSelMuxSel, CAdr);
assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))};
mux3 #(SETLEN) AdrSelMux(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
AdrSelMuxSel, CacheSet);
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0](
.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
.clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
// Select victim way for associative caches
if(NUMWAYS > 1) begin:vict
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CacheSet, .LRUWriteEn(LRUWriteEn & ~FlushStage),
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
end else
assign VictimWay = 1'b1; // one hot.
@ -138,7 +138,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag));
// Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines
if(DCACHE)
if(!READ_ONLY_CACHE)
mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
.d1(BeatCount), .s(SelBusBeat),
.y(WordOffsetAddr));

View File

@ -37,7 +37,7 @@ module cacheLRU
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
input logic [NUMWAYS-1:0] HitWay, // Which way is valid and matches PAdr's tag
input logic [NUMWAYS-1:0] ValidWay, // Which ways for a particular set are valid, ignores tag
input logic [SETLEN-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [SETLEN-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [SETLEN-1:0] PAdr, // Physical address
input logic LRUWriteEn, // Update the LRU state
input logic SetValid, // Set the dirty bit in the selected way and set
@ -124,8 +124,7 @@ module cacheLRU
// LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice.
// This is a two port memory.
// Every cycle must read from CAdr and each load/store must write the new LRU.
// this is still wrong.***************************
// Every cycle must read from CacheSet and each load/store must write the new LRU.
always_ff @(posedge clk) begin
if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
if(CacheEn) begin
@ -133,10 +132,10 @@ module cacheLRU
else if (LRUWriteEn & ~FlushStage) begin
LRUMemory[PAdr] <= NextLRU;
end
if(LRUWriteEn & ~FlushStage & (PAdr == CAdr))
if(LRUWriteEn & ~FlushStage & (PAdr == CacheSet))
CurrLRU <= #1 NextLRU;
else
CurrLRU <= #1 LRUMemory[CAdr];
CurrLRU <= #1 LRUMemory[CacheSet];
end
end

18
src/cache/cacheway.sv vendored
View File

@ -30,12 +30,12 @@
`include "wally-config.vh"
module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
OFFSETLEN = 5, INDEXLEN = 9, DIRTY_BITS = 1) (
OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) (
input logic clk,
input logic reset,
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
input logic [$clog2(NUMLINES)-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [`PA_BITS-1:0] PAdr, // Physical address
input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only)
input logic SetValid, // Set the dirty bit in the selected way and set
@ -114,7 +114,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
/////////////////////////////////////////////////////////////////////////////////////////////
ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
.addr(CAdr), .dout(ReadTag), .bwe('1),
.addr(CacheSet), .dout(ReadTag), .bwe('1),
.din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
@ -136,7 +136,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
localparam LOGNUMSRAM = $clog2(NUMSRAM);
for(words = 0; words < NUMSRAM; words++) begin: word
ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CAdr),
ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet),
.dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
@ -152,9 +152,9 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
always_ff @(posedge clk) begin // Valid bit array,
if (reset) ValidBits <= #1 '0;
if(CacheEn) begin
ValidWay <= #1 ValidBits[CAdr];
ValidWay <= #1 ValidBits[CacheSet];
if(InvalidateCache) ValidBits <= #1 '0;
else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay;
else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CacheSet] <= #1 SetValidWay;
end
end
@ -163,13 +163,13 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
/////////////////////////////////////////////////////////////////////////////////////////////
// Dirty bits
if (DIRTY_BITS) begin:dirty
if (!READ_ONLY_CACHE) begin:dirty
always_ff @(posedge clk) begin
// reset is optional. Consider merging with TAG array in the future.
//if (reset) DirtyBits <= #1 {NUMLINES{1'b0}};
if(CacheEn) begin
Dirty <= #1 DirtyBits[CAdr];
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CAdr] <= #1 SetDirtyWay;
Dirty <= #1 DirtyBits[CacheSet];
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CacheSet] <= #1 SetDirtyWay;
end
end
end else assign Dirty = 1'b0;

View File

@ -43,7 +43,7 @@ module hazard (
);
logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause;
logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW;
logic LatestUnstalledD, LatestUnstalledE, LatestUnstalledM, LatestUnstalledW;
logic FlushDCause, FlushECause, FlushMCause, FlushWCause;
// stalls and flushes
@ -95,14 +95,14 @@ module hazard (
assign #1 StallW = StallWCause;
// detect the first stage that is not stalled
assign FirstUnstalledD = ~StallD & StallF;
assign FirstUnstalledE = ~StallE & StallD;
assign FirstUnstalledM = ~StallM & StallE;
assign FirstUnstalledW = ~StallW & StallM;
assign LatestUnstalledD = ~StallD & StallF;
assign LatestUnstalledE = ~StallE & StallD;
assign LatestUnstalledM = ~StallM & StallE;
assign LatestUnstalledW = ~StallW & StallM;
// Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush
assign #1 FlushD = FirstUnstalledD | FlushDCause;
assign #1 FlushE = FirstUnstalledE | FlushECause;
assign #1 FlushM = FirstUnstalledM | FlushMCause;
assign #1 FlushW = FirstUnstalledW | FlushWCause;
assign #1 FlushD = LatestUnstalledD | FlushDCause;
assign #1 FlushE = LatestUnstalledE | FlushECause;
assign #1 FlushM = LatestUnstalledM | FlushMCause;
assign #1 FlushW = LatestUnstalledW | FlushWCause;
endmodule

View File

@ -71,7 +71,7 @@ module bpred (
logic [1:0] BPDirPredF;
logic [`XLEN-1:0] BTAF, RASPCF;
logic [`XLEN-1:0] BPBTAF, RASPCF;
logic BPPCWrongE;
logic IClassWrongE;
logic BPDirPredWrongE;
@ -85,7 +85,7 @@ module bpred (
logic BTBTargetWrongE;
logic RASTargetWrongE;
logic [`XLEN-1:0] BTAD;
logic [`XLEN-1:0] BPBTAD;
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
@ -95,7 +95,7 @@ module bpred (
logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW;
logic BPReturnWrongD;
logic [`XLEN-1:0] BTAE;
logic [`XLEN-1:0] BPBTAE;
@ -150,7 +150,7 @@ module bpred (
btb #(`BTB_SIZE)
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM,
.BTAF, .BTAD, .BTAE,
.BPBTAF, .BPBTAD, .BPBTAE,
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
.IClassWrongM, .IClassWrongE,
.IEUAdrE, .IEUAdrM,
@ -181,7 +181,7 @@ module bpred (
// Output the predicted PC or corrected PC on miss-predict.
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF);
mux2 #(`XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
// Selects the BP or PC+2/4.
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
// If the prediction is wrong select the correct address.
@ -196,7 +196,7 @@ module bpred (
if(`ZICOUNTERS_SUPPORTED) begin
logic [`XLEN-1:0] RASPCD, RASPCE;
logic BTBPredPCWrongE, RASPredPCWrongE;
logic BTAWrongE, RASPredPCWrongE;
// performance counters
// 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now
// 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal)
@ -207,14 +207,14 @@ module bpred (
// could be wrong or the fall through address selected for branch predict not taken.
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
// both without the above inaccuracies.
// **** use BTAWrongM from BTB.
assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
// **** use BPBTAWrongM from BTB.
assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
{BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE},
{BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
{BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});
end else begin

View File

@ -35,9 +35,9 @@ module btb #(parameter Depth = 10 ) (
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
output logic [`XLEN-1:0] BTAD,
output logic [`XLEN-1:0] BTAE,
output logic [`XLEN-1:0] BPBTAF, // BTB's guess at PC
output logic [`XLEN-1:0] BPBTAD,
output logic [`XLEN-1:0] BPBTAE,
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
// update
input logic IClassWrongM, // BTB's instruction class guess was wrong
@ -57,8 +57,8 @@ module btb #(parameter Depth = 10 ) (
logic [`XLEN+3:0] TableBTBPredF;
logic [`XLEN-1:0] IEUAdrW;
logic [`XLEN-1:0] PCW;
logic BTBWrongE, BTAWrongE;
logic BTBWrongM, BTAWrongM;
logic BTBWrongE, BPBTAWrongE;
logic BTBWrongM, BPBTAWrongM;
// hashing function for indexing the PC
@ -84,12 +84,12 @@ module btb #(parameter Depth = 10 ) (
assign MatchW = PCFIndex == PCWIndex;
assign MatchX = MatchD | MatchE | MatchM | MatchW;
assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} :
assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BPBTAD} :
MatchE ? {InstrClassE, IEUAdrE} :
MatchM ? {InstrClassM, IEUAdrM} :
{InstrClassW, IEUAdrW} ;
assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF};
assign {BTBIClassF, BPBTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF};
// An optimization may be using a PC relative address.
@ -97,16 +97,16 @@ module btb #(parameter Depth = 10 ) (
.clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF),
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1));
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD);
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);
// BTAE is not strickly necessary. However it is used by two parts of wally.
// BPBTAE is not strickly necessary. However it is used by two parts of wally.
// 1. It gates updates to the BTB when the prediction does not change. This save power.
// 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong.
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE);
assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);
// 2. BPBTAWrongE is used by the performance counters to track when the BTB's BPBTA or instruction class is wrong.
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
assign BPBTAWrongE = (BPBTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);
flopenrc #(1) BTAWrongMReg(clk, reset, FlushM, ~StallM, BTAWrongE, BTAWrongM);
assign BTBWrongM = BTAWrongM | IClassWrongM;
flopenrc #(1) BPBTAWrongMReg(clk, reset, FlushM, ~StallM, BPBTAWrongE, BPBTAWrongM);
assign BTBWrongM = BPBTAWrongM | IClassWrongM;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);

View File

@ -233,7 +233,7 @@ module ifu (
assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0;
cache #(.LINELEN(`ICACHE_LINELENINBITS),
.NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS),
.NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0))
.NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1))
icache(.clk, .reset, .FlushStage(FlushD), .Stall(GatedStallD),
.FetchBuffer, .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
@ -245,7 +245,7 @@ module ifu (
.CacheWriteData('0),
.CacheRW(CacheRWF),
.CacheAtomic('0), .FlushCache('0),
.NextAdr(PCSpillNextF[11:0]),
.NextSet(PCSpillNextF[11:0]),
.PAdr(PCPF),
.CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM));
ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW)

View File

@ -264,9 +264,9 @@ module lsu (
assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW);
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .DCACHE(1)) dcache(
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .READ_ONLY_CACHE(0)) dcache(
.clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM),
.FlushCache(FlushDCache), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM),
.FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM),
.ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]),
.CacheWriteData(LSUWriteDataM), .SelHPTW,
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),

View File

@ -69,6 +69,7 @@ logic [3:0] dummy;
logic DCacheFlushDone, DCacheFlushStart;
logic riscofTest;
logic StartSample, EndSample;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
@ -405,8 +406,7 @@ logic [3:0] dummy;
integer HPMCindex;
logic StartSampleFirst;
logic StartSampleDelayed;
logic StartSample;
logic EndSample, EndSampleFirst, EndSampleDelayed;
logic EndSampleFirst, EndSampleDelayed;
logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0];
string HPMCnames[] = '{"Mcycle",
@ -544,15 +544,23 @@ logic [3:0] dummy;
string direction;
int file;
logic PCSrcM;
string LogFile;
logic resetD, resetEdge;
flopenrc #(1) PCSrcMReg(clk, reset, dut.core.FlushM, ~dut.core.StallM, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PCSrcE, PCSrcM);
flop #(1) ResetDReg(clk, reset, resetD);
assign resetEdge = ~reset & resetD;
initial begin
file = $fopen("branch.log", "w");
LogFile = $psprintf("branch_%s%0d.log", `BPRED_TYPE, `BPRED_SIZE);
file = $fopen(LogFile, "w");
end
always @(posedge clk) begin
if(resetEdge) $fwrite(file, "TRAIN\n");
if(StartSample) $fwrite(file, "BEGIN %s\n", memfilename);
if(dut.core.ifu.InstrClassM[0] & ~dut.core.StallW & ~dut.core.FlushW & dut.core.InstrValidM) begin
direction = PCSrcM ? "t" : "n";
$fwrite(file, "%h %s\n", dut.core.PCM, direction);
end
if(EndSample) $fwrite(file, "END %s\n", memfilename);
end
end
end

View File

@ -178,8 +178,7 @@ module testbench;
void'(rvviRefMemorySetVolatile(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE)));
end
if (`UART_SUPPORTED) begin
//void'(rvviRefMemorySetVolatile(`UART_BASE, (`UART_BASE + `UART_RANGE)));
void'(rvviRefMemorySetVolatile(`UART_BASE, (`UART_BASE + 7))); // BUG
void'(rvviRefMemorySetVolatile(`UART_BASE, (`UART_BASE + `UART_RANGE)));
end
if (`PLIC_SUPPORTED) begin
void'(rvviRefMemorySetVolatile(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE)));