diff --git a/.gitignore b/.gitignore index f0afc3a98..1664b939f 100644 --- a/.gitignore +++ b/.gitignore @@ -180,3 +180,5 @@ sim/branch*.log benchmarks/embench/wally*.json benchmarks/embench/run* sim/cfi.log +sim/cfi/* +sim/branch/* diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index d7a18b7e2..d8fbddae9 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -43,6 +43,7 @@ sim: modelsim_build_memfile modelsim_run speed # launches modelsim to simulate tests on wally modelsim_run: + mkdir -p ../../sim/wkdir (cd ../../sim/ && vsim -c -do "do wally-batch.do rv32gc embench") cd ../../benchmarks/embench/ @@ -82,4 +83,4 @@ clean: allclean: clean rm -rf $(embench_dir)/logs/ -# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c \ No newline at end of file +# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c diff --git a/bin/CModelBTBAccuracy.sh b/bin/CModelBTBAccuracy.sh index e4a26fb85..479340eb2 100755 --- a/bin/CModelBTBAccuracy.sh +++ b/bin/CModelBTBAccuracy.sh @@ -40,18 +40,18 @@ do lines=`sim_bp gshare 16 16 $Size 1 $File | tail -5` Total=`echo "$lines" | head -1 | awk '{print $5}'` Miss=`echo "$lines" | tail -2 | head -1 | awk '{print $8}'` - BMDR=`echo "$Miss / $Total" | bc -l` + BMDR=`echo "100.0 * $Miss / $Total" | bc -l` BMDRArray+=("$BMDR") if [ $Miss -eq 0 ]; then - Product=`echo "scale=200; $Product / $Total" | bc -l` + Product=`echo "scale=200; $Product * 100 / $Total" | bc -l` else - Product=`echo "scale=200; $Product * $Miss / $Total" | bc -l` + Product=`echo "scale=200; $Product * $BMDR" | bc -l` fi Count=$((Count+1)) done # with such long precision bc outputs onto multiple lines # must remove \n and \ from string Product=`echo "$Product" | tr -d '\n' | tr -d '\\\'` - GeoMean=`perl -E "say $Product**(1/$Count) * 100"` + GeoMean=`perl -E "say $Product**(1/$Count)"` echo "$Pred$Size $GeoMean" done diff --git a/bin/CModelBranchAccuracy.sh b/bin/CModelBranchAccuracy.sh index 1b94f7c9a..8253891bb 100755 --- a/bin/CModelBranchAccuracy.sh +++ b/bin/CModelBranchAccuracy.sh @@ -46,7 +46,7 @@ do do #echo "sim_bp $Pred $Size $Size 18 1 $File | tail -1 | awk '{print $4}'" #echo "sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'" - BMDR=`sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'` + BMDR=`sim_bp -c $Pred $SizeString $File | tail -1 | awk '{print $4}'` Product=`echo "$Product * $BMDR" | bc` Count=$((Count+1)) done diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index a11296b3e..7e8eb7cde 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -32,12 +32,13 @@ import math import numpy as np import argparse -RefDataBP = [('twobitCModel6', 'twobitCModel', 64, 9.65280765420711), ('twobitCModel8', 'twobitCModel', 256, 8.75120245829945), ('twobitCModel10', 'twobitCModel', 1024, 8.1318382397263), - ('twobitCModel12', 'twobitCModel', 4096, 7.53026646633342), ('twobitCModel14', 'twobitCModel', 16384, 6.07679338544009), ('twobitCModel16', 'twobitCModel', 65536, 6.07679338544009), - ('gshareCModel6', 'gshareCModel', 64, 10.6602835418646), ('gshareCModel8', 'gshareCModel', 256, 8.38384710559667), ('gshareCModel10', 'gshareCModel', 1024, 6.36847432155534), - ('gshareCModel12', 'gshareCModel', 4096, 3.91108491151983), ('gshareCModel14', 'gshareCModel', 16384, 2.83926519215395), ('gshareCModel16', 'gshareCModel', 65536, .60213659066941)] -RefDataBTB = [('BTBCModel6', 'BTBCModel', 64, 1.11806778745097), ('BTBCModel8', 'BTBCModel', 256, 0.183833943219956), ('BTBCModel10', 'BTBCModel', 1024, 0.0109271020749376), - ('BTBCModel12', 'BTBCModel', 4096, 0.00437600802791213), ('BTBCModel14', 'BTBCModel', 16384, 0.00188756234204305), ('BTBCModel16', 'BTBCModel', 65536, 0.00188756234204305)] + +RefDataBP = [('twobitCModel6', 'twobitCModel', 64, 10.0060297551637), ('twobitCModel8', 'twobitCModel', 256, 8.4320392215602), ('twobitCModel10', 'twobitCModel', 1024, 7.29493318805151), + ('twobitCModel12', 'twobitCModel', 4096, 6.84739616147794), ('twobitCModel14', 'twobitCModel', 16384, 5.68432926870082), ('twobitCModel16', 'twobitCModel', 65536, 5.68432926870082), + ('gshareCModel6', 'gshareCModel', 64, 11.4737703417701), ('gshareCModel8', 'gshareCModel', 256, 8.52341470761974), ('gshareCModel10', 'gshareCModel', 1024, 6.32975690693015), + ('gshareCModel12', 'gshareCModel', 4096, 4.55424632377659), ('gshareCModel14', 'gshareCModel', 16384, 3.54251547725509), ('gshareCModel16', 'gshareCModel', 65536, 1.90424999467293)] +RefDataBTB = [('BTBCModel6', 'BTBCModel', 64, 1.51480272475844), ('BTBCModel8', 'BTBCModel', 256, 0.209057900418965), ('BTBCModel10', 'BTBCModel', 1024, 0.0117345454469572), + ('BTBCModel12', 'BTBCModel', 4096, 0.00125540990359826), ('BTBCModel14', 'BTBCModel', 16384, 0.000732471628510962), ('BTBCModel16', 'BTBCModel', 65536, 0.000732471628510962)] def ParseBranchListFile(path): '''Take the path to the list of Questa Sim log files containing the performance counters outputs. File @@ -254,9 +255,9 @@ def BarGraph(seriesDict, xlabelList, BenchPerRow, FileName, IncludeLegend): fig = plt.subplots(figsize = (EffectiveNumInGroup*BenchPerRow/8, 4)) colors = ['blue', 'blue', 'blue', 'blue', 'blue', 'blue', 'black', 'black', 'black', 'black', 'black', 'black'] for name in seriesDict: - xpos = np.arange(BenchPerRow) - xpos = [x + index*barWidth for x in xpos] values = seriesDict[name] + xpos = np.arange(len(values)) + xpos = [x + index*barWidth for x in xpos] plt.bar(xpos, Inversion(values), width=barWidth, edgecolor='grey', label=name, color=colors[index%len(colors)]) index += 1 plt.xticks([r + barWidth*(NumberInGroup/2-0.5) for r in range(0, BenchPerRow)], xlabelList) @@ -275,7 +276,7 @@ def SelectPartition(xlabelListBig, seriesDictBig, group, BenchPerRow): return(xlabelListTrunk, seriesDictTrunk) -def ReportAsGraph(benchmarkDict, bar): +def ReportAsGraph(benchmarkDict, bar, FileName): def FormatToPlot(currBenchmark): names = [] sizes = [] @@ -329,8 +330,8 @@ def ReportAsGraph(benchmarkDict, bar): axes.set_xticks(xdata) axes.set_xticklabels(xdata) axes.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5) - plt.show() - + if(FileName == None): plt.show() + else: plt.savefig(FileName) # if(not args.summary): # size = len(benchmarkDict) @@ -389,7 +390,7 @@ def ReportAsGraph(benchmarkDict, bar): # on each piece. for row in range(0, math.ceil(NumBenchmarks / BenchPerRow)): (xlabelListTrunk, seriesDictTrunk) = SelectPartition(xlabelListBig, seriesDictBig, row, BenchPerRow) - FileName = 'barSegment%d.png' % row + FileName = 'barSegment%d.svg' % row groupLen = len(xlabelListTrunk) BarGraph(seriesDictTrunk, xlabelListTrunk, groupLen, FileName, (row == 0)) @@ -414,7 +415,8 @@ displayMode.add_argument('--text', action='store_const', help='Display in text f displayMode.add_argument('--table', action='store_const', help='Display in text format only.', default=False, const=True) displayMode.add_argument('--gui', action='store_const', help='Display in text format only.', default=False, const=True) displayMode.add_argument('--debug', action='store_const', help='Display in text format only.', default=False, const=True) -parser.add_argument('sources', nargs=1) +parser.add_argument('sources', nargs=1, help='File lists the input Questa transcripts to process.') +parser.add_argument('FileName', metavar='FileName', type=str, nargs='?', help='output graph to file .png If not included outputs to screen.', default=None) args = parser.parse_args() @@ -454,7 +456,7 @@ if(ReportMode == 'text'): ReportAsText(benchmarkDict) if(ReportMode == 'gui'): - ReportAsGraph(benchmarkDict, args.bar) + ReportAsGraph(benchmarkDict, args.bar, args.FileName) # *** this is only needed of -b (no -s) diff --git a/fpga/generator/Makefile b/fpga/generator/Makefile index 9c7f557f3..4f60d1ebe 100644 --- a/fpga/generator/Makefile +++ b/fpga/generator/Makefile @@ -6,17 +6,17 @@ dst := IP #export board := vcu118 # vcu108 -#export XILINX_PART := xcvu095-ffva2104-2-e -#export XILINX_BOARD := xilinx.com:vcu108:part0:1.2 -#export board := vcu108 +export XILINX_PART := xcvu095-ffva2104-2-e +export XILINX_BOARD := xilinx.com:vcu108:part0:1.2 +export board := vcu108 # Arty A7 -export XILINX_PART := xc7a100tcsg324-1 -export XILINX_BOARD := digilentinc.com:arty-a7-100:part0:1.1 -export board := ArtyA7 +# export XILINX_PART := xc7a100tcsg324-1 +# export XILINX_BOARD := digilentinc.com:arty-a7-100:part0:1.1 +# export board := ArtyA7 # for Arty A7 and S7 boards -all: FPGA_Arty +all: FPGA_VCU # VCU 108 and VCU 118 boards #all: FPGA_VCU @@ -70,6 +70,9 @@ PreProcessFiles: sed -i "s/PLIC_NUM_SRC = .*/PLIC_NUM_SRC = 32'd53;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/PLIC_SDC_ID.*/PLIC_SDC_ID = 32'd20;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/BPRED_SIZE.*/BPRED_SIZE = 32'd12;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/$\$readmemh.*/$\$readmemh(\"..\/..\/..\/fpga\/src\/boot.mem\", ROM, 0);/g" ../src/CopiedFiles_do_not_add_to_repo/generic/mem/rom1p1r.sv + # This line allows the Bootloader to be loaded in a Block RAM on the FPGA + sed -i "s/logic \[DATA_WIDTH-1:0\].*ROM.*/(\* rom_style=\"block\" \*) &/g" ../src/CopiedFiles_do_not_add_to_repo/generic/mem/rom1p1r.sv $(dst)/%.log: %.tcl mkdir -p IP diff --git a/linux/Makefile b/linux/Makefile index cc19c7f2a..9ef677d55 100644 --- a/linux/Makefile +++ b/linux/Makefile @@ -33,10 +33,8 @@ OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump) all: download Image disassemble install Image: - make -C $(BUILDROOT) --jobs + bash -c "unset LD_LIBRARY_PATH; make -C $(BUILDROOT) --jobs;" $(MAKE) generate - # TODO: Need to find a way to set the PATH for child processes. - # source ../setup.sh; $(MAKE) disassemble install: sudo rm -rf $(RISCV)/$(BUILDROOT) diff --git a/sim/bp-results/branch-list.txt b/sim/bp-results/branch-list.txt index c241610d3..956fc9847 100644 --- a/sim/bp-results/branch-list.txt +++ b/sim/bp-results/branch-list.txt @@ -1,12 +1,12 @@ -gshare6.log gshare 6 -gshare8.log gshare 8 -gshare10.log gshare 10 -gshare12.log gshare 12 -gshare14.log gshare 14 -gshare16.log gshare 16 -twobit6.log twobit 6 -twobit8.log twobit 8 -twobit10.log twobit 10 -twobit12.log twobit 12 -twobit14.log twobit 14 -twobit16.log twobit 16 +../logs/rv32gc_gshare6.log gshare 6 +../logs/rv32gc_gshare8.log gshare 8 +../logs/rv32gc_gshare10.log gshare 10 +../logs/rv32gc_gshare12.log gshare 12 +../logs/rv32gc_gshare14.log gshare 14 +../logs/rv32gc_gshare16.log gshare 16 +../logs/rv32gc_twobit6.log twobit 6 +../logs/rv32gc_twobit8.log twobit 8 +../logs/rv32gc_twobit10.log twobit 10 +../logs/rv32gc_twobit12.log twobit 12 +../logs/rv32gc_twobit14.log twobit 14 +../logs/rv32gc_twobit16.log twobit 16 diff --git a/sim/bp-results/btb-list.txt b/sim/bp-results/btb-list.txt index 741efdf24..30811459e 100644 --- a/sim/bp-results/btb-list.txt +++ b/sim/bp-results/btb-list.txt @@ -1,6 +1,6 @@ -btb6.log btb 6 -btb8.log btb 8 -btb10.log btb 10 -btb12.log btb 12 -btb14.log btb 14 -btb16.log btb 16 +../logs/rv32gc_BTB6.log btb 6 +../logs/rv32gc_BTB8.log btb 8 +../logs/rv32gc_BTB10.log btb 10 +../logs/rv32gc_BTB12.log btb 12 +../logs/rv32gc_BTB14.log btb 14 +../logs/rv32gc_BTB16.log btb 16 diff --git a/sim/bp-results/class-list.txt b/sim/bp-results/class-list.txt index 0d24aa6ee..3926af969 100644 --- a/sim/bp-results/class-list.txt +++ b/sim/bp-results/class-list.txt @@ -1,6 +1,6 @@ -class6.log class 6 -class8.log class 8 -class10.log class 10 -class12.log class 12 -class14.log class 14 -class16.log class 16 +../logs/rv32gc_class6.log class 6 +../logs/rv32gc_class8.log class 8 +../logs/rv32gc_class10.log class 10 +../logs/rv32gc_class12.log class 12 +../logs/rv32gc_class14.log class 14 +../logs/rv32gc_class16.log class 16 diff --git a/sim/bp-results/ras-list.txt b/sim/bp-results/ras-list.txt index b3e273a3d..c7628ffaa 100644 --- a/sim/bp-results/ras-list.txt +++ b/sim/bp-results/ras-list.txt @@ -1,5 +1,5 @@ -ras3.log ras 3 -ras4.log ras 4 -ras6.log ras 6 -ras10.log ras 10 -ras16.log ras 16 +../logs/rv32gc_RAS3.log ras 3 +../logs/rv32gc_RAS4.log ras 4 +../logs/rv32gc_RAS6.log ras 6 +../logs/rv32gc_RAS10.log ras 10 +../logs/rv32gc_RAS16.log ras 16 diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py index 60af41298..c04b9bd51 100755 --- a/sim/bpred-sim.py +++ b/sim/bpred-sim.py @@ -132,7 +132,7 @@ def main(): # BTB and class size sweep bpdSize = [6, 8, 10, 12, 14, 16] for CurrBPSize in bpdSize: - name = 'BTB'+str(CurrBPSize) + name = 'class'+str(CurrBPSize) configOptions = "+define+INSTR_CLASS_PRED=1 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+RAS_SIZE=16+define+BTB_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE" tc = TestCase( name=name, diff --git a/sim/wally-batch.do b/sim/wally-batch.do index df34aa0b3..b117e1691 100644 --- a/sim/wally-batch.do +++ b/sim/wally-batch.do @@ -63,10 +63,10 @@ if {$2 eq "buildroot"} { # start and run simulation if { $coverage } { echo "wally-batch buildroot coverage" - vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G -o testbenchopt +cover=sbecf + vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -o testbenchopt +cover=sbecf vsim -lib wkdir/work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3691,13286 -fatal 7 -cover } else { - vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G -o testbenchopt + vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -o testbenchopt vsim -lib wkdir/work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3691,13286 -fatal 7 } @@ -76,7 +76,7 @@ if {$2 eq "buildroot"} { } elseif {$2 eq "buildroot-no-trace"} { vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 # start and run simulation - vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G -G NO_SPOOFING=1 -o testbenchopt + vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G NO_SPOOFING=1 -o testbenchopt vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829,13286 -fatal 7 #-- Run the Simulation diff --git a/sim/wally-linux-imperas.do b/sim/wally-linux-imperas.do index 196c780be..e9bad30d5 100644 --- a/sim/wally-linux-imperas.do +++ b/sim/wally-linux-imperas.do @@ -88,10 +88,10 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { #run 100 ns #force -deposit testbench/dut/core/priv/priv/csr/csri/IE_REGW 16'h2aa #force -deposit testbench/dut/uncore/uncore/clint/clint/MTIMECMP 64'h1000 - run 7000 ms + run 9800 ms add log -recursive /testbench/dut/* do wave.do - run 14000 ms + run 200 ms #run -all exec ./slack-notifier/slack-notifier.py diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 4701fc4c7..12a81028f 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -79,8 +79,8 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] HitWay, ValidWay; logic CacheHit; - logic [NUMWAYS-1:0] VictimWay, DirtyWay; - logic LineDirty; + logic [NUMWAYS-1:0] VictimWay, DirtyWay, HitDirtyWay; + logic LineDirty, HitLineDirty; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; logic [TAGLEN-1:0] Tag; logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1; @@ -98,8 +98,6 @@ module cache import cvw::*; #(parameter cvw_t P, logic SelWay; logic [LINELEN/8-1:0] LineByteMask; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; - logic ZeroCacheLine; - logic [LINELEN-1:0] PreLineWriteData; genvar index; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -118,7 +116,7 @@ module cache import cvw::*; #(parameter cvw_t P, cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, .SelWay, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .VictimWay, - .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); + .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitDirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches if(NUMWAYS > 1) begin:vict @@ -130,6 +128,7 @@ module cache import cvw::*; #(parameter cvw_t P, assign CacheHit = |HitWay; assign LineDirty = |DirtyWay; + assign HitLineDirty = |HitDirtyWay; // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. @@ -161,11 +160,6 @@ module cache import cvw::*; #(parameter cvw_t P, ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path ///////////////////////////////////////////////////////////////////////////////////////////// - if(P.ZICBOZ_SUPPORTED) begin : cboz_supported - mux2 #(LINELEN) WriteDataMux(FetchBuffer, '0, ZeroCacheLine, PreLineWriteData); - end else begin - assign PreLineWriteData = FetchBuffer; - end if(!READ_ONLY_CACHE) begin:WriteSelLogic logic [LINELEN/8-1:0] DemuxedByteMask, FetchBufferByteSel; @@ -185,14 +179,14 @@ module cache import cvw::*; #(parameter cvw_t P, // Merge write data into fetched cache line for store miss for(index = 0; index < LINELEN/8; index++) begin mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), - .d1(PreLineWriteData[8*index+7:8*index]), .s(FetchBufferByteSel[index] | ZeroCacheLine), .y(LineWriteData[8*index+7:8*index])); + .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index] & ~CMOp[3]), .y(LineWriteData[8*index+7:8*index])); end assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; end else begin:WriteSelLogic // No need for this mux if the cache does not handle writes. - assign LineWriteData = PreLineWriteData; + assign LineWriteData = FetchBuffer; assign LineByteMask = '1; end @@ -225,9 +219,9 @@ module cache import cvw::*; #(parameter cvw_t P, cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .Stall, - .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, + .CacheHit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .SelWay, - .ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .SelWriteback, .SelFlush, + .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CMOp, .CacheEn, .LRUWriteEn); diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 8a11dd735..7c48e65dc 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -51,6 +51,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // cache internals input logic CacheHit, // Exactly 1 way hits input logic LineDirty, // The selected line and way is dirty + input logic HitLineDirty, // The cache hit way is dirty input logic FlushAdrFlag, // On last set of a cache flush input logic FlushWayFlag, // On the last way for any set of a cache flush output logic SelAdr, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr @@ -58,7 +59,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic ClearValid, // Clear the valid bit in the selected way and set output logic SetDirty, // Set the dirty bit in the selected way and set output logic ClearDirty, // Clear the dirty bit in the selected way and set - output logic ZeroCacheLine, // Write zeros to all bytes of cacheline output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic LRUWriteEn, // Update the LRU state output logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr @@ -74,10 +74,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic AnyUpdateHit, AnyHit; logic AnyMiss; logic FlushFlag; - logic CMOWritebackHit; logic CMOWriteback; logic CMOZeroNoEviction; - logic CMOZeroEviction; typedef enum logic [3:0]{STATE_READY, // hit states // miss states @@ -95,10 +93,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit - assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now - assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line - assign CMOWriteback = CMOWritebackHit | CMOZeroEviction; + assign CMOWriteback = ((CMOp[1] | CMOp[2]) & CacheHit & HitLineDirty) | CMOp[3] & LineDirty; assign FlushFlag = FlushAdrFlag & FlushWayFlag; @@ -130,9 +126,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; else NextState = STATE_READY; // exclusion-tag-start: icache case - STATE_WRITEBACK: if (CacheBusAck & (CMOp[1] | CMOp[2])) NextState = STATE_READ_HOLD; - else if(CacheBusAck & ~CMOp[3]) NextState = STATE_FETCH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; + STATE_WRITEBACK: if(CacheBusAck & ~(|CMOp[3:1])) NextState = STATE_FETCH; + else if(CacheBusAck) NextState = STATE_READ_HOLD; // Read_hold lowers CacheStall else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; @@ -156,27 +151,25 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache assign SetValid = CurrState == STATE_WRITE_LINE | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_READY & CMOZeroNoEviction) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); - assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0] & CacheHit) | - (CurrState == STATE_WRITEBACK & CMOp[2] & CacheBusAck)); + (CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); + assign ClearValid = (CurrState == STATE_READY & CMOp[0]) | + (CurrState == STATE_WRITEBACK & CMOp[2] & CacheBusAck); // coverage off -item e 1 -fecexprrow 8 assign LRUWriteEn = (((CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) | (CurrState == STATE_WRITE_LINE)) & ~FlushStage) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck); + (CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck); // exclusion-tag-start: icache flushdirtycontrols assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); + (CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls - (P.ZICBOM_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck); - assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOp[1] | CMOp[2])) | (P.ZICBOZ_SUPPORTED & CacheBusAck & CMOp[3]))) | - (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (P.ZICBOZ_SUPPORTED & CMOZeroNoEviction & ~CacheHit))) | + CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck; + assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOp[1] | CMOp[2])) | (CacheBusAck & CMOp[3]))) | + (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | (CurrState == STATE_WRITE_LINE); - assign ZeroCacheLine = P.ZICBOZ_SUPPORTED & ((CurrState == STATE_READY & CMOZeroNoEviction) | - (CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck))); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2] | ~CacheBusAck)) | (CurrState == STATE_READY & AnyMiss & LineDirty); assign SelFlush = (CurrState == STATE_READY & FlushCache) | @@ -198,7 +191,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | - (P.ZICBOM_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & ~CacheBusAck); + (CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & ~CacheBusAck); assign SelAdr = (CurrState == STATE_READY & (CacheRW[0] | AnyMiss | (|CMOp))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed (CurrState == STATE_FETCH) | diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 9fb836e93..82956fc29 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -51,7 +51,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, output logic [LINELEN-1:0] ReadDataLineWay,// This way's read data if valid output logic HitWay, // This way hits output logic ValidWay, // This way is valid - output logic DirtyWay, // This way is dirty + output logic HitDirtyWay, // The hit way is dirty + output logic DirtyWay , // The selected way is dirty output logic [TAGLEN-1:0] TagWay); // This way's tag if valid localparam WORDSPERLINE = LINELEN/XLEN; @@ -65,10 +66,10 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLine; logic [TAGLEN-1:0] ReadTag; logic Dirty; - logic SelTag; + logic SelDirty; logic SelectedWriteWordEn; logic [LINELEN/8-1:0] FinalByteMask; - logic SetValidEN; + logic SetValidEN, ClearValidEN; logic SetValidWay; logic ClearValidWay; logic SetDirtyWay; @@ -78,7 +79,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, if (!READ_ONLY_CACHE) begin:flushlogic logic FlushWayEn; - mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag); + mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelDirty); // FlushWay is part of a one hot way selection. Must clear it if FlushWay not selected. // coverage off -item e 1 -fecexprrow 3 @@ -86,11 +87,11 @@ module cacheway import cvw::*; #(parameter cvw_t P, assign FlushWayEn = FlushWay & SelFlush; assign SelNonHit = FlushWayEn | SelWay; end else begin:flushlogic // no flush operation for read-only caches. - assign SelTag = VictimWay; + assign SelDirty = VictimWay; assign SelNonHit = SelWay; end - mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData); + mux2 #(1) selectedwaymux(HitWay, SelDirty, SelNonHit , SelData); ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux @@ -102,6 +103,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, assign ClearDirtyWay = ClearDirty & SelData; assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage; // exclusion-tag: icache SelectedWiteWordEn assign SetValidEN = SetValidWay & ~FlushStage; // exclusion-tag: cache SetValidEN + assign ClearValidEN = ClearValidWay & ~FlushStage; // exclusion-tag: cache SetValidEN // If writing the whole line set all write enables to 1, else only set the correct word. assign FinalByteMask = SetValidWay ? '1 : LineByteMask; // OR @@ -116,7 +118,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, // AND portion of distributed tag multiplexer assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux - assign DirtyWay = SelTag & Dirty & ValidWay; + assign HitDirtyWay = Dirty & ValidWay; + assign DirtyWay = SelDirty & HitDirtyWay; assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]); ///////////////////////////////////////////////////////////////////////////////////////////// @@ -156,7 +159,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, if(CacheEn) begin ValidWay <= #1 ValidBits[CacheSet]; if(InvalidateCache) ValidBits <= #1 '0; // exclusion-tag: dcache invalidateway - else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CacheSet] <= #1 SetValidWay; + else if (SetValidEN) ValidBits[CacheSet] <= #1 SetValidWay; + else if (ClearValidEN) ValidBits[CacheSet] <= #1 '0; end end diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 054022106..78b0d15e8 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -66,6 +66,7 @@ module ahbcacheinterface #( input logic [LLEN-1:0] WriteDataM, // IEU write data for uncached store input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write input logic [2:0] Funct3, // Size of uncached memory operation + input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache // lsu/ifu interface input logic Stall, // Core pipeline is stalled @@ -80,6 +81,7 @@ module ahbcacheinterface #( logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA logic [AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s logic [AHBW-1:0] PreHWDATA; // AHB Address phase write data + logic [PA_BITS-1:0] PAdrZero; genvar index; @@ -91,10 +93,11 @@ module ahbcacheinterface #( .q(FetchBuffer[(index+1)*AHBW-1:index*AHBW])); end - mux2 #(PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR); + assign PAdrZero = BusCMOZero ? {PAdr[PA_BITS-1:$clog2(LINELEN/8)], {$clog2(LINELEN/8){1'b0}}} : PAdr; + mux2 #(PA_BITS) localadrmux(PAdrZero, CacheBusAdr, Cacheable, LocalHADDR); assign HADDR = ({{PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(AHBW/8)) + LocalHADDR; - mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); + mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable | BusCMOZero), .y(HSIZE)); // When AHBW is less than LLEN need extra muxes to select the subword from cache's read data. logic [AHBW-1:0] CacheReadDataWordAHB; @@ -119,6 +122,6 @@ module ahbcacheinterface #( buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm( .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, - .CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed, + .CacheBusRW, .BusCMOZero, .CacheBusAck, .BeatCount, .BeatCountDelayed, .HREADY, .HTRANS, .HWRITE, .HBURST); endmodule diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 4d1d475d8..45f66762f 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -42,6 +42,7 @@ module buscachefsm #( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write + input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache output logic BusStall, // Bus is busy with an in flight memory operation output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt @@ -75,6 +76,9 @@ module buscachefsm #( logic BeatCntEn; logic BeatCntReset; logic CacheAccess; + logic BusWrite; + + assign BusWrite = CacheBusRW[0] | BusCMOZero; always_ff @(posedge HCLK) if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; @@ -83,18 +87,18 @@ module buscachefsm #( always_comb begin case(CurrState) ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; - else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; else NextState = ADR_PHASE; DATA_PHASE: if(HREADY) NextState = MEM3; else NextState = DATA_PHASE; MEM3: if(Stall) NextState = MEM3; else NextState = ADR_PHASE; - CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + CACHE_FETCH: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_FETCH; - CACHE_WRITEBACK: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + CACHE_WRITEBACK: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_WRITEBACK; @@ -128,7 +132,7 @@ module buscachefsm #( (CacheAccess & FinalBeatCount & |CacheBusRW & HREADY & ~Flush) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |BeatCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; - assign HWRITE = (BusRW[0] | CacheBusRW[0] & ~Flush) | (CurrState == CACHE_WRITEBACK & |BeatCount); + assign HWRITE = (BusRW[0] | BusWrite & ~Flush) | (CurrState == CACHE_WRITEBACK & |BeatCount); assign HBURST = `BURST_EN & ((|CacheBusRW & ~Flush) | (CacheAccess & |BeatCount)) ? LocalBurstType : 3'b0; always_comb begin @@ -142,8 +146,8 @@ module buscachefsm #( end // communication to cache - assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount); - assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | CacheBusRW[0])) | + assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount & ~BusCMOZero); + assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | BusWrite)) | (CurrState == DATA_PHASE & BusRW[0]) | (CurrState == CACHE_WRITEBACK) | (CurrState == CACHE_FETCH); diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index 617a779ff..c298dba63 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -33,7 +33,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) ); // Core Memory - (*rom_style="block" *) logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; + logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; // dh 10/30/23 ROM macros are presently commented out // because they don't point to a generated ROM diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index a489b7f86..41be3941a 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -357,8 +357,10 @@ module controller import cvw::*; #(parameter cvw_t P) ( // Cache Management instructions always_comb begin CMOpD = 4'b0000; // default: not a cbo instruction - if ((P.ZICBOM_SUPPORTED | P.ZICBOZ_SUPPORTED) & CMOD) begin + if ((P.ZICBOZ_SUPPORTED) & CMOD) begin CMOpD[3] = (InstrD[31:20] == 12'd4); // cbo.zero + end + if ((P.ZICBOM_SUPPORTED) & CMOD) begin CMOpD[2] = (InstrD[31:20] == 12'd2); // cbo.clean CMOpD[1] = (InstrD[31:20] == 12'd1) | ((InstrD[31:20] == 12'd0) & (ENVCFG_CBE[1:0] == 2'b01)); // cbo.flush CMOpD[0] = (InstrD[31:20] == 12'd0) & (ENVCFG_CBE[1:0] == 2'b11); // cbo.inval @@ -425,6 +427,5 @@ module controller import cvw::*; #(parameter cvw_t P) ( // a cache cannot read or write immediately after a write // atomic operations are also detected as MemRWD[1] //assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED))); - // *** RT: Modify for ZICBOZ - assign StoreStallD = (MemRWE[0] | (|CMOpE & P.ZICBOM_SUPPORTED)) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | (|CMOpD & P.ZICBOM_SUPPORTED))); + assign StoreStallD = (MemRWE[0] | (|CMOpE)) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | (|CMOpD))); endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 831d9e6bb..4a02848b5 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -182,7 +182,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), - .UpdateDA(InstrUpdateDAF), + .UpdateDA(InstrUpdateDAF), .CMOp(4'b0), .AtomicAccessM(1'b0),.ExecuteAccessF(1'b1), .WriteAccessM(1'b0), .ReadAccessM(1'b0), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); @@ -252,7 +252,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( ahbcacheinterface #(P.AHBW, P.LLEN, P.PA_BITS, WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1) ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), .HRDATA, - .Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), + .Flush(FlushD), .CacheBusRW, .BusCMOZero(1'b0), .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), .Funct3(3'b010), .HADDR(IFUHADDR), .HREADY(IFUHREADY), .HWRITE(IFUHWRITE), .CacheBusAdr(ICacheBusAdr), .BeatCount(), .Cacheable(CacheableF), .SelBusBeat(), .WriteDataM('0), .CacheBusAck(ICacheBusAck), .HWDATA(), .CacheableOrFlushCacheM(1'b0), .CacheReadDataWordM('0), diff --git a/src/lsu/align.sv b/src/lsu/align.sv index ce704a316..d3ca5ecc5 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -37,11 +37,8 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation input logic [1:0] MemRWM, - input logic CacheableM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched - input logic DTLBMissM, // ITLB miss, ignore memory request - input logic DataUpdateDAM, // ITLB miss, ignore memory request input logic SelHPTW, input logic [(P.LLEN-1)/8:0] ByteMaskM, @@ -54,7 +51,6 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [1:0] MemRWSpillM, output logic SelStoreDelay, //*** this is bad. really don't like moving this outside output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic SpillStallM); @@ -65,26 +61,19 @@ module align import cvw::*; #(parameter cvw_t P) ( typedef enum logic [1:0] {STATE_READY, STATE_SPILL, STATE_STORE_DELAY} statetype; statetype CurrState, NextState; - logic TakeSpillM; - logic SpillM; + logic ValidSpillM; logic SelSpillM; logic SpillSaveM; - logic [P.LLEN-1:0] ReadDataWordFirstHalfM; + logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; - logic [P.XLEN-1:0] IEUAdrIncrementM; + logic [P.XLEN-1:0] IEUAdrIncrementM; - logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; - logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; - logic SaveByteMask; - logic HalfMisalignedM, WordMisalignedM; - logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; - logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; - logic HalfSpillM, WordSpillM; - logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; - logic ValidAccess; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; + logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -101,40 +90,26 @@ module align import cvw::*; #(parameter cvw_t P) ( // 2) offset // 3) access location within the cacheline - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; - + // compute misalignement always_comb begin case (Funct3M[1:0]) 2'b00: AccessByteOffsetM = '0; // byte access - 2'b01: AccessByteOffsetM = {2'b00, ByteOffsetM[0]}; // half access - 2'b10: AccessByteOffsetM = {1'b0, ByteOffsetM[1:0]}; // word access - 2'b11: AccessByteOffsetM = ByteOffsetM; // double access - default: AccessByteOffsetM = ByteOffsetM; + 2'b01: AccessByteOffsetM = {2'b00, IEUAdrM[0]}; // half access + 2'b10: AccessByteOffsetM = {1'b0, IEUAdrM[1:0]}; // word access + 2'b11: AccessByteOffsetM = IEUAdrM[2:0]; // double access + default: AccessByteOffsetM = IEUAdrM[2:0]; + endcase + case (Funct3M[1:0]) + 2'b00: PotentialSpillM = '0; // byte access + 2'b01: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:1] == '1; // half access + 2'b10: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:2] == '1; // word access + 2'b11: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:3] == '1; // double access + default: PotentialSpillM = '0; endcase end - - // compute misalignement - assign HalfMisalignedM = (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; - assign WordMisalignedM = (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; - assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & HalfMisalignedM; - assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & WordMisalignedM; - assign ValidAccess = (|MemRWM) & ~SelHPTW; - - if(P.LLEN == 64) begin - logic DoubleSpillM; - logic DoubleMisalignedM; - assign DoubleMisalignedM = (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; - assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & DoubleMisalignedM; - assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM | DoubleMisalignedM); - assign SpillM = ValidAccess & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); - end else begin - assign SpillM = ValidAccess & CacheableM & (HalfSpillM | WordSpillM); - assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM); - end + assign MisalignedM = (|MemRWM) & (AccessByteOffsetM != '0); - // align by shifting - // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); + assign ValidSpillM = MisalignedM & PotentialSpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -142,8 +117,8 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillM & ~MemRWM[0]) NextState = STATE_SPILL; - else if(TakeSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; + STATE_READY: if (ValidSpillM & ~MemRWM[0]) NextState = STATE_SPILL; + else if(ValidSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; @@ -153,12 +128,10 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign SelSpillM = (CurrState == STATE_SPILL | CurrState == STATE_STORE_DELAY); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); - assign SaveByteMask = (CurrState == STATE_READY & TakeSpillM); - assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; + assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); + assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; - mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled data @@ -173,21 +146,20 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * AccessByteOffsetM : '0); + assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * AccessByteOffsetM : '0); + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; - mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill + mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half - {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, // spill, second half + {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half {SelSpillM, SelSpillE}, ByteMaskSpillM); - flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SaveByteMask, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); - mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 5c21d7ecd..2b8a65ac6 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -148,7 +148,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle logic IgnoreRequest; // On FlushM or TLB miss ignore memory operation logic SelDTIM; // Select DTIM rather than bus or D$ - + logic [P.XLEN-1:0] WriteDataZM; + ///////////////////////////////////////////////////////////////////////////////////////////// // Pipeline for IEUAdr E to M // Zero-extend address to 34 bits for XLEN=32 @@ -158,10 +159,10 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin : ziccslm_align logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, - .MemRWM, .CacheableM, - .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .SelHPTW, + .MemRWM, + .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .MemRWSpillM, .DCacheReadDataWordSpillM, .SpillStallM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -176,6 +177,12 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign {SpillStallM, SelStoreDelay} = '0; end + if(P.ZICBOZ_SUPPORTED) begin : cboz + mux2 #(P.XLEN) writedatacbozmux(WriteDataM, '0, CMOpM[3], WriteDataZM); + end else begin : cboz + assign WriteDataZM = WriteDataM; + end + ///////////////////////////////////////////////////////////////////////////////////////////// // HPTW (only needed if VM supported) // MMU include PMP and is needed if any privileged supported @@ -187,7 +194,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .FlushW, .DCacheStallM, .SATP_REGW, .PCSpillF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_HADE, .PrivilegeModeW, .ReadDataM(ReadDataM[P.XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN - .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, + .WriteDataM(WriteDataZM), .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, .IEUAdrExtM, .PTE, .IHWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IHAdrM, .HPTWStall, .SelHPTW, .IgnoreRequestTLB, .LSULoadAccessFaultM, .LSUStoreAmoAccessFaultM, @@ -198,7 +205,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; - assign IHWriteDataM = WriteDataM; + assign IHWriteDataM = WriteDataZM; assign LoadAccessFaultM = LSULoadAccessFaultM; assign StoreAmoAccessFaultM = LSUStoreAmoAccessFaultM; assign {HPTWStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; @@ -221,7 +228,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic DisableTranslation; // During HPTW walk or D$ flush disable virtual memory address translation logic WriteAccessM; assign DisableTranslation = SelHPTW | FlushDCacheM; - assign WriteAccessM = PreLSURWM[0] | (|CMOpM); + assign WriteAccessM = PreLSURWM[0]; mmu #(.P(P), .TLB_ENTRIES(P.DTLB_ENTRIES), .IMMU(0)) dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_HADE, .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), @@ -231,7 +238,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM, .StoreAmoPageFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. - .UpdateDA(DataUpdateDAM), + .UpdateDA(DataUpdateDAM), .CMOp(CMOpM), .AtomicAccessM(|LSUAtomicM), .ExecuteAccessF(1'b0), .WriteAccessM, .ReadAccessM(PreLSURWM[1]), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); @@ -294,7 +301,13 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic FlushDCache; // Suppress d cache flush if there is an ITLB miss. logic CacheStall; logic [1:0] CacheBusRWTemp; - + logic BusCMOZero; + + if(P.ZICBOZ_SUPPORTED) begin + assign BusCMOZero = CMOpM[3] & ~CacheableM; + end else begin + assign BusCMOZero = '0; + end assign BusRW = ~CacheableM & ~SelDTIM ? LSURWM : '0; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; assign CacheRWM = CacheableM & ~SelDTIM ? LSURWM : '0; @@ -321,7 +334,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), - .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheableOrFlushCacheM, + .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM)); diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index 3ee9c23d5..576bb21b8 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -30,7 +30,7 @@ module adrdecs import cvw::*; #(parameter cvw_t P) ( input logic [P.PA_BITS-1:0] PhysicalAddress, - input logic AccessRW, AccessRX, AccessRWX, + input logic AccessRW, AccessRX, AccessRWXC, input logic [1:0] Size, output logic [11:0] SelRegions ); @@ -39,9 +39,9 @@ module adrdecs import cvw::*; #(parameter cvw_t P) ( // Determine which region of physical memory (if any) is being accessed adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[11]); adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[10]); - adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[9]); + adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWXC, Size, SUPPORTED_SIZE, SelRegions[9]); adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[8]); - adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[7]); + adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWXC, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec #(P.PA_BITS) clintdec(PhysicalAddress, P.CLINT_BASE[P.PA_BITS-1:0], P.CLINT_RANGE[P.PA_BITS-1:0], P.CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[6]); adrdec #(P.PA_BITS) gpiodec(PhysicalAddress, P.GPIO_BASE[P.PA_BITS-1:0], P.GPIO_RANGE[P.PA_BITS-1:0], P.GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[5]); adrdec #(P.PA_BITS) uartdec(PhysicalAddress, P.UART_BASE[P.PA_BITS-1:0], P.UART_RANGE[P.PA_BITS-1:0], P.UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[4]); diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index a497b6da7..e8d87503c 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -55,6 +55,7 @@ module mmu import cvw::*; #(parameter cvw_t P, output logic UpdateDA, // page fault due to setting dirty or access bit output logic LoadMisalignedFaultM, StoreAmoMisalignedFaultM, // misaligned fault sources // PMA checker signals + input logic [3:0] CMOp, // Cache management instructions input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // access type input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP addresses @@ -84,7 +85,7 @@ module mmu import cvw::*; #(parameter cvw_t P, .SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .SATP_ASID(SATP_REGW[P.ASID_BASE+P.ASID_BITS-1:P.ASID_BASE]), .VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_HADE, - .PrivilegeModeW, .ReadAccess, .WriteAccess, + .PrivilegeModeW, .ReadAccess, .WriteAccess, .CMOp, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, .Translate, .TLBPageFault, .UpdateDA, .PBMemoryType); @@ -106,7 +107,7 @@ module mmu import cvw::*; #(parameter cvw_t P, // Check physical memory accesses /////////////////////////////////////////// - pmachecker #(P) pmachecker(.PhysicalAddress, .Size, + pmachecker #(P) pmachecker(.PhysicalAddress, .Size, .CMOp, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PBMemoryType, .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); @@ -114,7 +115,7 @@ module mmu import cvw::*; #(parameter cvw_t P, if (P.PMP_ENTRIES > 0) begin : pmp pmpchecker #(P) pmpchecker(.PhysicalAddress, .PrivilegeModeW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .ExecuteAccessF, .WriteAccessM, .ReadAccessM, + .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .CMOp, .PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM); end else begin assign PMPInstrAccessFaultF = 0; diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 7aa20fc2f..3c23d3623 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -31,6 +31,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( input logic [P.PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, + input logic [3:0] CMOp, input logic AtomicAccessM, // Atomic access input logic ExecuteAccessF, // Execute access input logic WriteAccessM, // Write access @@ -43,18 +44,18 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( ); logic PMAAccessFault; - logic AccessRW, AccessRWX, AccessRX; + logic AccessRW, AccessRWXC, AccessRX; logic [11:0] SelRegions; logic AtomicAllowed; logic CacheableRegion, IdempotentRegion; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWX = ReadAccessM | WriteAccessM | ExecuteAccessF; + assign AccessRWXC = ReadAccessM | WriteAccessM | ExecuteAccessF | (|CMOp); assign AccessRX = ReadAccessM | ExecuteAccessF; // Determine which region of physical memory (if any) is being accessed - adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions); + adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXC, Size, SelRegions); // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[9] | SelRegions[8] | SelRegions[7]; // exclusion-tag: unused-cachable @@ -71,8 +72,8 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( assign SelTIM = SelRegions[11] | SelRegions[10]; // exclusion-tag: unused-idempotent // Detect access faults - assign PMAAccessFault = (SelRegions[0]) & AccessRWX | AtomicAccessM & ~AtomicAllowed; + assign PMAAccessFault = (SelRegions[0]) & AccessRWXC | AtomicAccessM & ~AtomicAllowed; assign PMAInstrAccessFaultF = ExecuteAccessF & PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM & PMAAccessFault; - assign PMAStoreAmoAccessFaultM = WriteAccessM & PMAAccessFault; + assign PMAStoreAmoAccessFaultM = (WriteAccessM | (|CMOp)) & PMAAccessFault; endmodule diff --git a/src/mmu/pmpchecker.sv b/src/mmu/pmpchecker.sv index 89c22c486..ddd7e72b0 100644 --- a/src/mmu/pmpchecker.sv +++ b/src/mmu/pmpchecker.sv @@ -42,6 +42,7 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, + input logic [3:0] CMOp, output logic PMPInstrAccessFaultF, output logic PMPLoadAccessFaultM, output logic PMPStoreAmoAccessFaultM @@ -53,6 +54,8 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( logic [P.PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. logic [P.PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set logic [P.PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + logic PMPCMOAccessFault, PMPCBOMAccessFault, PMPCBOZAccessFault; + if (P.PMP_ENTRIES > 0) begin: pmp // prevent complaints about array of no elements when PMP_ENTRIES = 0 pmpadrdec #(P) pmpadrdecs[P.PMP_ENTRIES-1:0]( @@ -69,7 +72,11 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( // Only enforce PMP checking for S and U modes or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW != P.M_MODE) | (|(L & FirstMatch)); // *** switch to this logic when PMP is initialized for non-machine mode + assign PMPCBOMAccessFault = EnforcePMP & (|CMOp[2:0]) & ~|((R|W) & FirstMatch) ; + assign PMPCBOZAccessFault = EnforcePMP & CMOp[3] & ~|(W & FirstMatch) ; + assign PMPCMOAccessFault = PMPCBOZAccessFault | PMPCBOMAccessFault; + assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; - assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; + assign PMPStoreAmoAccessFaultM = (EnforcePMP & WriteAccessM & ~|(W & FirstMatch)) | PMPCMOAccessFault; assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ; endmodule diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv index 861e721b6..a5f95c70d 100644 --- a/src/mmu/tlb/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -62,6 +62,7 @@ module tlb import cvw::*; #(parameter cvw_t P, input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic ReadAccess, input logic WriteAccess, + input logic [3:0] CMOp, input logic DisableTranslation, input logic [P.XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) input logic [P.XLEN-1:0] PTE, // page table entry to write @@ -106,7 +107,7 @@ module tlb import cvw::*; #(parameter cvw_t P, assign VPN = VAdr[P.VPN_BITS+11:12]; tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_HADE, - .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, + .PrivilegeModeW, .ReadAccess, .WriteAccess, .CMOp, .DisableTranslation, .TLBFlush, .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, .UpdateDA, .SV39Mode, .Translate, .PTE_N, .PBMemoryType); diff --git a/src/mmu/tlb/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv index 31312f767..dd296b892 100644 --- a/src/mmu/tlb/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -35,6 +35,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( input logic ENVCFG_HADE, // HPTW A/D Update enable input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic ReadAccess, WriteAccess, + input logic [3:0] CMOp, input logic DisableTranslation, input logic TLBFlush, // Invalidate all TLB entries input logic [11:0] PTEAccessBits, @@ -67,7 +68,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( assign Translate = (SATP_MODE != P.NO_TRANSLATE[P.SVMODE_BITS-1:0]) & (EffectivePrivilegeMode != P.M_MODE) & ~DisableTranslation; // Determine whether TLB is being used - assign TLBAccess = ReadAccess | WriteAccess; + assign TLBAccess = ReadAccess | WriteAccess | (|CMOp); // Check that upper bits are legal (all 0s or all 1s) vm64check #(P) vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); @@ -98,6 +99,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( assign InvalidAccess = ~PTE_X; end else begin:dtlb // Data TLB fault checking logic InvalidRead, InvalidWrite; + logic InvalidCBOM, InvalidCBOZ; // User mode may only load/store from user mode pages, and supervisor mode // may only access user mode pages when STATUS_SUM is low. @@ -110,7 +112,9 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess & ~PTE_W; - assign InvalidAccess = InvalidRead | InvalidWrite; + assign InvalidCBOM = (|CMOp[2:0]) & (~PTE_W | (~PTE_R & (~STATUS_MXR | ~PTE_X))); + assign InvalidCBOZ = CMOp[3] & ~PTE_W; + assign InvalidAccess = InvalidRead | InvalidWrite | InvalidCBOM | InvalidCBOZ; assign PreUpdateDA = ~PTE_A | WriteAccess & ~PTE_D; end diff --git a/wallyriscvTopAll.png b/wallyriscvTopAll.png index 4f675507f..a0d0e7cce 100644 Binary files a/wallyriscvTopAll.png and b/wallyriscvTopAll.png differ