From 3135e1202e999373d1046bbc8fc8ea446c1506af Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Thu, 7 Jul 2022 16:08:21 +0000 Subject: [PATCH 01/11] plot tuning, fo4 axis --- addins/riscv-arch-test | 2 +- synthDC/extractSummary.py | 113 +++++++++++++++++++++++++++++--------- synthDC/wallySynth.py | 12 ++-- 3 files changed, 94 insertions(+), 33 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index be67c99b..307c77b2 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 +Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 5f0d5015..71e21cfe 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -8,6 +8,9 @@ from matplotlib.cbook import flatten import matplotlib.pyplot as plt import matplotlib.lines as lines from wallySynth import testFreq +import numpy as np +from ppa.ppaAnalyze import noOutliers +from matplotlib import ticker def synthsintocsv(): @@ -27,7 +30,7 @@ def synthsintocsv(): writer.writerow(['Width', 'Config', 'Special', 'Tech', 'Target Freq', 'Delay', 'Area']) for oneSynth in allSynths: - descrip = specReg.findall(oneSynth) #[30:] + descrip = specReg.findall(oneSynth) width = descrip[2][:4] config = descrip[2][4:] if descrip[3][-2:] == 'nm': @@ -71,6 +74,7 @@ def synthsfromcsv(filename): allSynths[i] = Synth(*allSynths[i]) return allSynths + def freqPlot(tech, width, config): ''' plots delay, area for syntheses with specified tech, module, width ''' @@ -83,16 +87,24 @@ def freqPlot(tech, width, config): delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] - f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) + allFreqs = list(flatten(freqsL)) + if allFreqs != []: + median = np.median(allFreqs) + else: + median = 0 for ind in [0,1]: areas = areasL[ind] delays = delaysL[ind] freqs = freqsL[ind] + freqs, delays, areas = noOutliers(median, freqs, delays, areas) c = 'blue' if ind else 'green' - ax1.scatter(freqs, delays, color=c) - ax2.scatter(freqs, areas, color=c) + targs = [1000/f for f in freqs] + + ax1.scatter(targs, delays, color=c) + ax2.scatter(targs, areas, color=c) freqs = list(flatten(freqsL)) delays = list(flatten(delaysL)) @@ -104,20 +116,25 @@ def freqPlot(tech, width, config): ax1.legend(handles=legend_elements) ytop = ax2.get_ylim()[1] ax2.set_ylim(ymin=0, ymax=1.1*ytop) - ax2.set_xlabel("Target Freq (MHz)") - ax1.set_ylabel('Delay (ns)') + ax2.set_xlabel("Target Cycle Time (ns)") + ax1.set_ylabel('Cycle Time Achieved (ns)') ax2.set_ylabel('Area (sq microns)') - ax1.set_title(tech + ' ' + width +config) + ax1.set_title(tech + ' ' + width + config) + ax2.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}')) + addFO4axis(fig, ax1, tech) + plt.savefig('./plots/wally/freqSweep_' + tech + '_' + width + config + '.png') # plt.show() -def areaDelay(tech, freq, width=None, config=None, special=None): + + +def areaDelay(tech, fig=None, ax=None, freq=None, width=None, config=None, norm=False): delays, areas, labels = ([] for i in range(3)) for oneSynth in allSynths: if (width==None) or (width == oneSynth.width): if (tech == oneSynth.tech) & (freq == oneSynth.freq): - if (special != None) & (oneSynth.special == special): + if (config == None) & (oneSynth.special == 'FPUoff'): #fix delays += [oneSynth.delay] areas += [oneSynth.area] labels += [oneSynth.width + oneSynth.config] @@ -125,44 +142,88 @@ def areaDelay(tech, freq, width=None, config=None, special=None): delays += [oneSynth.delay] areas += [oneSynth.area] labels += [oneSynth.special] - # else: - # delays += [oneSynth.delay] - # areas += [oneSynth.area] - # labels += [oneSynth.config + '_' + oneSynth.special] if width == None: width = '' + if (fig == None) or (ax == None): + fig, (ax) = plt.subplots(1, 1) + ax.ticklabel_format(useOffset=False, style='plain') + plt.subplots_adjust(left=0.18) + + if norm: + delays = [d/techdict[tech][0] for d in delays] + areas = [a/techdict[tech][1] for a in areas] - f, (ax1) = plt.subplots(1, 1) plt.scatter(delays, areas) - plt.xlabel('Delay (ns)') + plt.xlabel('Cycle time (ns)') plt.ylabel('Area (sq microns)') - ytop = ax1.get_ylim()[1] + ytop = ax.get_ylim()[1] plt.ylim(ymin=0, ymax=1.1*ytop) titleStr = tech + ' ' + width saveStr = tech + '_' + width if config: titleStr += config saveStr = saveStr + config + '_versions_' - if (special != None): - titleStr += special + if (config == None): saveStr = saveStr + '_origConfigs_' saveStr += str(freq) - titleStr = titleStr + ' (target freq: ' + str(freq) + ')' + titleStr = titleStr plt.title(titleStr) + + ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}')) for i in range(len(labels)): plt.annotate(labels[i], (delays[i], areas[i]), textcoords="offset points", xytext=(0,10), ha='center') - plt.savefig('./plots/wally/areaDelay_' + saveStr + '.png') + # addFO4axis(fig, ax1, tech) -# ending freq in 42 means fpu was turned off manually + plt.savefig('./plots/wally/areaDelay_' + saveStr + '.png') + +def normAreaDelay(): + fig2, (ax) = plt.subplots(1, 1) + areaDelay('sky90', fig=fig2, ax=ax, freq=testFreq[0], norm=True) + areaDelay('tsmc28', fig=fig2, ax=ax, freq=testFreq[1], norm=True) + ax.set_title('Normalized Area & Cycle Time by Configuration') + ax.set_xlabel('Cycle Time (FO4)') + ax.set_ylabel('Area (add32)') + fullLeg = [lines.Line2D([0], [0], color='royalblue', label='tsmc28')] + fullLeg += [lines.Line2D([0], [0], color='orange', label='sky90')] + ax.legend(handles = fullLeg, loc='upper left') + plt.savefig('./plots/wally/normAreaDelay.png') + +def addFO4axis(fig, ax, tech): + fo4 = techdict[tech][0] + + ax3 = fig.add_axes((0.125,0.14,0.775,0.0)) + ax3.yaxis.set_visible(False) # hide the yaxis + + fo4Range = [x/fo4 for x in ax.get_xlim()] + dif = fo4Range[1] - fo4Range[0] + for n in [0.02, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]: + d = dif/n + if d > 3 and d < 10: + r = [int(x/n) for x in fo4Range] + nsTicks = [round(x*n, 2) for x in range(r[0], r[1]+1)] + break + new_tick_locations = [fo4*float(x) for x in nsTicks] + + ax3.set_xticks(new_tick_locations) + ax3.set_xticklabels(nsTicks) + ax3.set_xlim(ax.get_xlim()) + ax3.set_xlabel("FO4 delays") + plt.subplots_adjust(left=0.125, bottom=0.25, right=0.9, top=0.9) + if __name__ == '__main__': - synthsintocsv() + + techdict = {'sky90': [43.2e-3, 1440.600027], 'tsmc28': [12.2e-3, 209.286002]} + + # synthsintocsv() synthsfromcsv('Summary.csv') freqPlot('tsmc28', 'rv32', 'e') freqPlot('sky90', 'rv32', 'e') - areaDelay('tsmc28', testFreq[1], width= 'rv64', config='gc') - areaDelay('sky90', testFreq[0], width='rv64', config='gc') - areaDelay('tsmc28', testFreq[1], special='FPUoff') - areaDelay('sky90', testFreq[0], special='FPUoff') \ No newline at end of file + areaDelay('tsmc28', freq=testFreq[1], width= 'rv64', config='gc') + areaDelay('sky90', freq=testFreq[0], width='rv64', config='gc') + areaDelay('tsmc28', freq=testFreq[1]) + areaDelay('sky90', freq=testFreq[0]) + + # normAreaDelay() diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index 66f09e26..3195d850 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -9,7 +9,7 @@ def runCommand(config, tech, freq): commands = ["make fresh", "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq)] for c in commands: subprocess.Popen(c, shell=True) - time.sleep(60) + # time.sleep(60) fix only do this when diff configs testFreq = [3000, 10000] @@ -26,14 +26,14 @@ if __name__ == '__main__': tech = techs[i] sc = sweepCenter[i] f = testFreq[i] - # for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep - # synthsToRun += [['rv32e', tech, freq]] + for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep + synthsToRun += [['rv32e', tech, freq]] # for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64ic', 'rv32e']: # configs # config = config + '_FPUoff' # while FPU under rennovation # synthsToRun += [[config, tech, f]] - for mod in ['noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations - config = 'rv64gc_' + mod - synthsToRun += [[config, tech, f]] + # for mod in ['noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations + # config = 'rv64gc_' + mod + # synthsToRun += [[config, tech, f]] for x in synthsToRun: pool.starmap(runCommand, [x]) \ No newline at end of file From 10ebcd1f9544b06550080c8f135b143418fb4764 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 7 Jul 2022 16:42:30 -0700 Subject: [PATCH 02/11] CoreMark makefile tuning --- benchmarks/coremark/Makefile | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 2a35f1e8..1178a921 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -7,19 +7,28 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c +ABI = lp64 +#$(if $(findstring 64, $(XLEN)), lp64, ilp32) +PORT_CFLAGS = -g -march=$(XLEN)im -mabi=$(ABI) -static -mcmodel=medlow -mtune=sifive-3-series \ + -O3 -falign-functions=16 -funroll-all-loops \ + -finline-functions -falign-jumps=4 \ + -nostdlib -nostartfiles -ffreestanding -mstrict-align \ + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \ + -DPERFORMANCE_RUN=1 + $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv riscv64-unknown-elf-objdump -D $< > $<.elf.objdump riscv64-unknown-elf-elf2hex --bit-width $(XLEN) --input $< --output $@ extractFunctionRadix.sh $<.elf.objdump - (cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" > $(work_dir)/coremark.sim.log)) + (cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log)) cd ../../benchmarks/coremark/ $(work_dir)/coremark.bare.riscv: $(sources) Makefile # make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta" # These flags were used by WD on CoreMark # make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " - make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=lp$(XLEN) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " + make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " # -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error mkdir -p $(work_dir) mv $(cmbase)/coremark.bare.riscv $(work_dir) From 8e6aa12b2b264c9b158aed7e98193266f3c4a8e9 Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Fri, 8 Jul 2022 08:01:10 +0000 Subject: [PATCH 03/11] restore flatten --- synthDC/scripts/synth.tcl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 1e659107..36a72302 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -105,8 +105,7 @@ set_critical_range [expr $my_period*0.05] $current_design # Partitioning - flatten or hierarchically synthesize if { $maxopt == 1 } { - ungroup -all -simple_names - # -flatten + ungroup -all -simple_names -flatten } # Set input pins except clock From 8f0f6261407de38d8b94028cd091324899e3e866 Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Fri, 8 Jul 2022 08:02:11 +0000 Subject: [PATCH 04/11] made wally synth flow shell based --- synthDC/Makefile | 15 +++++++------ synthDC/runAllSynths.sh | 34 +++++++++++++++++++++------- synthDC/wallySynth.py | 49 +++++++++++++++++++++++------------------ 3 files changed, 62 insertions(+), 36 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index bef47e04..97cb1ca5 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -6,9 +6,10 @@ NAME := synth # defaults export DESIGN ?= wallypipelinedcore export FREQ ?= 3000 -export CONFIG ?= rv32e_FPUoff -TITLE = shreya -# sky130 and sky90 presently supported +export CONFIG ?= rv32e +TITLE = + +# tsmc28, sky130, and sky90 presently supported export TECH ?= sky90 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient # Avoid when doing sweeps of many optimization points in parallel @@ -111,7 +112,9 @@ $(CONFIGFILESTRIM): make synth DESIGN=wallypipelinedcore CONFIG=$@ TECH=sky90 FREQ=3000 MAXCORES=1 -synth: clean +synth: + rm -f hdl/* + rm -rf WORK @echo "DC Synthesis" @mkdir -p hdl/ @mkdir -p $(OUTPUTDIR) @@ -124,6 +127,7 @@ endif dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out clean: +# fix should make del be here rm -rf alib-52 WORK analyzed $(NAME).out rm -f hdl/* rm -f default.svf @@ -133,9 +137,6 @@ clean: rm -f Synopsys_stack_trace_*.txt rm -f crte_*.txt -fresh: - rm -rf WORK - rm -f hdl/* diff --git a/synthDC/runAllSynths.sh b/synthDC/runAllSynths.sh index bd3c036a..24ac52eb 100755 --- a/synthDC/runAllSynths.sh +++ b/synthDC/runAllSynths.sh @@ -1,11 +1,29 @@ #!/usr/bin/bash +# Madeleine Masser-Frye mmasserfrye@hmc.edu July 2022 -make clean -# mv runs runArchive/$(date +"%Y_%m_%d_%I_%M_%p") -# mv newRuns runs -# mkdir newRuns -make del -make copy -make configs +helpFunction() +{ echo "" + echo "Usage: $0 " + echo -e "\t--configs Synthesizes wally with configurations 32e, 32ic, 64ic, 32gc, and 64gc" + echo -e "\t--freqs NUM Synthesizes rv32e with target frequencies at NUM MHz and +/- 2, 4, 6, 8 %" + echo -e "\t--features Synthesizes rv64gc versions FPUoff, noMulDiv, noPriv, PMP0, PMP16" + exit 1 # Exit script after printing help +} -./wallySynth.py \ No newline at end of file +VALID_ARGS=$(getopt -o cft: --long configs,features,freqs: -- "$@") + +eval set -- "$VALID_ARGS" +unset VALID_ARGS + +if [[ $1 == "--" ]]; + then helpFunction +elif [[ $1 == "--freqs" ]] && [[ ! $2 =~ ^[[:digit:]]+$ ]] + then echo "Argument must be an integer, target frequnecy is in MHz" +else + make clean + make del + make copy + make configs + ./wallySynth.py $1 $2 + ./extractSummary.py +fi \ No newline at end of file diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index 3195d850..cb79a255 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -4,36 +4,43 @@ import subprocess from multiprocessing import Pool import time +import sys -def runCommand(config, tech, freq): - commands = ["make fresh", "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq)] - for c in commands: - subprocess.Popen(c, shell=True) - # time.sleep(60) fix only do this when diff configs +def runSynth(config, tech, freq): + global pool + command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=1 MAXCORES=1".format(config, tech, freq) + pool.map(mask, [command]) + +def mask(command): + subprocess.Popen(command, shell=True) testFreq = [3000, 10000] if __name__ == '__main__': + i = 0 techs = ['sky90', 'tsmc28'] - sweepCenter = [870, 2940] synthsToRun = [] - + tech = techs[i] + freq = testFreq[i] arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8] pool = Pool() + staggerPeriod = 60 #seconds - for i in [0]: - tech = techs[i] - sc = sweepCenter[i] - f = testFreq[i] + typeToRun = sys.argv[1] + + if 'configs' in typeToRun: + for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64ic', 'rv32e']: # configs + config = config + '_orig' # until memory integrated + runSynth(config, tech, freq) + time.sleep(staggerPeriod) + elif 'features' in typeToRun: + for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations + config = 'rv64gc_' + mod + runSynth(config, tech, freq) + time.sleep(staggerPeriod) + elif 'freqs' in typeToRun: + sc = int(sys.argv[2]) + config = 'rv32e' for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep - synthsToRun += [['rv32e', tech, freq]] - # for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64ic', 'rv32e']: # configs - # config = config + '_FPUoff' # while FPU under rennovation - # synthsToRun += [[config, tech, f]] - # for mod in ['noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations - # config = 'rv64gc_' + mod - # synthsToRun += [[config, tech, f]] - - for x in synthsToRun: - pool.starmap(runCommand, [x]) \ No newline at end of file + runSynth(config, tech, freq) \ No newline at end of file From 9b6d9666c51e13d8ad01326b05ef2c29158835d4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 8 Jul 2022 08:43:24 +0000 Subject: [PATCH 05/11] Removed unused swbytemask from CLINT --- pipelined/src/uncore/clint_apb.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipelined/src/uncore/clint_apb.sv b/pipelined/src/uncore/clint_apb.sv index fb704cf5..a05bf4c2 100644 --- a/pipelined/src/uncore/clint_apb.sv +++ b/pipelined/src/uncore/clint_apb.sv @@ -58,8 +58,6 @@ module clint_apb ( if (`XLEN==64) assign #2 entry = {PADDR[15:3], 3'b000}; else assign #2 entry = {PADDR[15:2], 2'b00}; - //swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(entry[2:0]), .ByteMask(PSTRB)); - // DH 2/20/21: Eventually allow MTIME to run off a separate clock // This will require synchronizing MTIME to the system clock // before it is read or compared to MTIMECMP. From 3f9e6622010f26fd542342744ad742d9c8ae446f Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 8 Jul 2022 08:44:37 +0000 Subject: [PATCH 06/11] Removed subwordwrite mention in cache because sww is needed to replicate data across byte enables --- pipelined/src/cache/cacheway.sv | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index ac1e26e8..a5f68653 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -105,7 +105,6 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // Data Array ///////////////////////////////////////////////////////////////////////////////////////////// - // *** Potential optimization: if byte write enables are available, could remove subwordwrites genvar words; for(words = 0; words < LINELEN/`XLEN; words++) begin: word sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr), From 1ce0975366c46da0210f14b7fb9d23d24dc570d3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 8 Jul 2022 08:45:21 +0000 Subject: [PATCH 07/11] Adjusting byte writes to RAM --- pipelined/src/uncore/ram.sv | 7 ++++- pipelined/src/uncore/uncore.sv | 51 +++++----------------------------- 2 files changed, 13 insertions(+), 45 deletions(-) diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 7ef023fe..39094142 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -38,6 +38,7 @@ module ram #(parameter BASE=0, RANGE = 65535) ( input logic HREADY, input logic [1:0] HTRANS, input logic [`XLEN-1:0] HWDATA, + input logic [`XLEN/8-1:0] HWSTRB, input logic [3:0] HSIZED, output logic [`XLEN-1:0] HREADRam, output logic HRESPRam, HREADYRam @@ -74,8 +75,12 @@ module ram #(parameter BASE=0, RANGE = 65535) ( // *** it shoudl be centralized and sent over HWSTRB swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask)); + always @(posedge HCLK) begin + assert (ByteMask == HWSTRB | ~memwriteD) else $display("HSIZED %b HADDRD %b ByteMask %b HWSTRB %b\n", HSIZED[1:0], HADDRD[2:0], ByteMask, HWSTRB); + end + // single-ported RAM bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH) - memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA)); + memory(.clk(HCLK), .we(memwriteD), /*.bwe(HWSTRB), */ .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA)); endmodule diff --git a/pipelined/src/uncore/uncore.sv b/pipelined/src/uncore/uncore.sv index f4b1202e..0ffa17bf 100644 --- a/pipelined/src/uncore/uncore.sv +++ b/pipelined/src/uncore/uncore.sv @@ -69,13 +69,13 @@ module uncore ( output logic [63:0] MTIME_CLINT ); - logic [`XLEN-1:0] HREADRam, HREADCLINT, HREADPLIC, HREADGPIO, HREADUART, HREADSDC; + logic [`XLEN-1:0] HREADRam, HREADSDC; logic [8:0] HSELRegions; logic HSELRam, HSELCLINT, HSELPLIC, HSELGPIO, HSELUART, HSELSDC; logic HSELEXTD, HSELRamD, HSELCLINTD, HSELPLICD, HSELGPIOD, HSELUARTD, HSELSDCD; - logic HRESPRam, HRESPCLINT, HRESPPLIC, HRESPGPIO, HRESPUART, HRESPSDC; - logic HREADYRam, HREADYCLINT, HREADYPLIC, HREADYGPIO, HREADYUART, HRESPSDCD; + logic HRESPRam, HRESPSDC; + logic HREADYRam, HRESPSDCD; logic [`XLEN-1:0] HREADBootRom; logic HSELBootRom, HSELBootRomD, HRESPBootRom, HREADYBootRom, HREADYSDC; logic HSELNoneD; @@ -120,30 +120,23 @@ module uncore ( .HCLK, .HRESETn, .HSELRam, .HADDR, .HWRITE, .HREADY, .HSIZED, - .HTRANS, .HWDATA, .HREADRam, + .HTRANS, .HWDATA, .HWSTRB, .HREADRam, .HRESPRam, .HREADYRam); end + // *** switch to new RAM if (`BOOTROM_SUPPORTED) begin : bootrom - ram_orig #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE)) + ram #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE)) bootrom( .HCLK, .HRESETn, .HSELRam(HSELBootRom), .HADDR, .HWRITE, .HREADY, .HTRANS, .HSIZED, - .HWDATA, + .HWDATA, .HWSTRB, .HREADRam(HREADBootRom), .HRESPRam(HRESPBootRom), .HREADYRam(HREADYBootRom)); end // memory-mapped I/O peripherals if (`CLINT_SUPPORTED == 1) begin : clint -/* clint clint( - .HCLK, .HRESETn, .TIMECLK, - .HSELCLINT, .HADDR(HADDR[15:0]), .HWRITE, - .HWDATA, .HREADY, .HTRANS, .HSIZED, - .HREADCLINT, - .HRESPCLINT, .HREADYCLINT, - .MTIME(MTIME_CLINT), - .MTimerInt, .MSwInt);*/ clint_apb clint( .PCLK, .PRESETn, .PSEL(PSEL[1]), .PADDR(PADDR[15:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[1]), .PREADY(PREADY[1]), @@ -155,13 +148,6 @@ module uncore ( assign MTimerInt = 0; assign MSwInt = 0; end if (`PLIC_SUPPORTED == 1) begin : plic -/* plic plic( - .HCLK, .HRESETn, - .HSELPLIC, .HADDR(HADDR[27:0]), - .HWRITE, .HREADY, .HTRANS, .HWDATA, - .UARTIntr, .GPIOIntr, - .HREADPLIC, .HRESPPLIC, .HREADYPLIC, - .MExtInt, .SExtInt); */ plic_apb plic( .PCLK, .PRESETn, .PSEL(PSEL[2]), .PADDR(PADDR[27:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[2]), .PREADY(PREADY[2]), @@ -172,17 +158,6 @@ module uncore ( assign SExtInt = 0; end if (`GPIO_SUPPORTED == 1) begin : gpio -/* gpio gpio( - .HCLK, .HRESETn, .HSELGPIO, - .HADDR(HADDR[7:0]), - .HWDATA, - .HWRITE, .HREADY, - .HTRANS, - .HREADGPIO, - .HRESPGPIO, .HREADYGPIO, - .GPIOPinsIn, - .GPIOPinsOut, .GPIOPinsEn, - .GPIOIntr); */ gpio_apb gpio( .PCLK, .PRESETn, .PSEL(PSEL[0]), .PADDR(PADDR[7:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[0]), .PREADY(PREADY[0]), @@ -191,15 +166,6 @@ module uncore ( assign GPIOPinsOut = 0; assign GPIOPinsEn = 0; assign GPIOIntr = 0; end if (`UART_SUPPORTED == 1) begin : uart -/* uart uart( - .HCLK, .HRESETn, - .HSELUART, - .HADDR(HADDR[2:0]), - .HWRITE, .HWDATA, - .HREADUART, .HRESPUART, .HREADYUART, - .SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface - .SOUT(UARTSout), .RTSb(), .DTRb(), // to E1A driver to RS232 interface - .OUT1b(), .OUT2b(), .INTR(UARTIntr), .TXRDYb(), .RXRDYb()); // to CPU */ uart_apb uart( .PCLK, .PRESETn, .PSEL(PSEL[3]), .PADDR(PADDR[2:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, .PRDATA(PRDATA[3]), .PREADY(PREADY[3]), @@ -227,9 +193,6 @@ module uncore ( // AHB Read Multiplexer assign HRDATA = ({`XLEN{HSELRamD}} & HREADRam) | ({`XLEN{HSELEXTD}} & HRDATAEXT) | -// ({`XLEN{HSELCLINTD}} & HREADCLINT) | -// ({`XLEN{HSELPLICD}} & HREADPLIC) | -// ({`XLEN{HSELGPIOD}} & HREADGPIO) | ({`XLEN{HSELBRIDGED}} & HREADBRIDGE) | ({`XLEN{HSELBootRomD}} & HREADBootRom) | // ({`XLEN{HSELUARTD}} & HREADUART) | From 381f3298d81b765465c0863d38798d8e7ba134f5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 8 Jul 2022 09:09:02 +0000 Subject: [PATCH 08/11] Moved HWSTRB to ahblite, factored out of peripherals. Moved old AHB peripherals to unusedsrc --- pipelined/src/ebu/ahblite.sv | 5 +- pipelined/src/uncore/clint.sv | 261 ----------------------- pipelined/src/uncore/gpio.sv | 161 -------------- pipelined/src/uncore/plic.sv | 261 ----------------------- pipelined/src/uncore/plic_apb.sv | 18 -- pipelined/src/uncore/ram.sv | 12 +- pipelined/src/uncore/ram_orig.sv | 107 ---------- pipelined/src/uncore/sdc/SDC.sv | 2 +- pipelined/src/uncore/uart.sv | 107 ---------- pipelined/src/uncore/uncore.sv | 26 +-- pipelined/src/wally/wallypipelinedsoc.sv | 2 +- 11 files changed, 10 insertions(+), 952 deletions(-) delete mode 100644 pipelined/src/uncore/clint.sv delete mode 100644 pipelined/src/uncore/gpio.sv delete mode 100644 pipelined/src/uncore/plic.sv delete mode 100644 pipelined/src/uncore/ram_orig.sv delete mode 100644 pipelined/src/uncore/uart.sv diff --git a/pipelined/src/ebu/ahblite.sv b/pipelined/src/ebu/ahblite.sv index 3744807b..a502bf9f 100644 --- a/pipelined/src/ebu/ahblite.sv +++ b/pipelined/src/ebu/ahblite.sv @@ -157,7 +157,10 @@ module ahblite ( assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise assign HMASTLOCK = 0; // no locking supported assign HWRITE = (NextBusState == MEMWRITE); - assign HWSTRB = ByteMaskM; + //assign HWSTRB = ByteMaskM; + // Byte mask for HWSTRB + swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB)); + // delay write data by one cycle for flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN // delay signals for subword writes diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv deleted file mode 100644 index 33c10459..00000000 --- a/pipelined/src/uncore/clint.sv +++ /dev/null @@ -1,261 +0,0 @@ -/////////////////////////////////////////// -// clint.sv -// -// Written: David_Harris@hmc.edu 14 January 2021 -// Modified: -// -// Purpose: Core-Local Interruptor -// See FE310-G002-Manual-v19p05 for specifications -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// -/* -`include "wally-config.vh" - -module clint ( - input logic HCLK, HRESETn, TIMECLK, - input logic HSELCLINT, - input logic [15:0] HADDR, - input logic [3:0] HSIZED, - input logic HWRITE, - input logic [`XLEN-1:0] HWDATA, - input logic HREADY, - input logic [1:0] HTRANS, - output logic [`XLEN-1:0] HREADCLINT, - output logic HRESPCLINT, HREADYCLINT, - (* mark_debug = "true" *) output logic [63:0] MTIME, - output logic MTimerInt, MSwInt); - - logic MSIP; - - logic [15:0] entry, entryd; - logic memwrite; - logic initTrans; - (* mark_debug = "true" *) logic [63:0] MTIMECMP; - logic [`XLEN/8-1:0] ByteMaskM; - integer i, j; - - assign initTrans = HREADY & HSELCLINT & (HTRANS != 2'b00); - // entryd and memwrite are delayed by a cycle because AHB controller waits a cycle before outputting write data - flopr #(1) memwriteflop(HCLK, ~HRESETn, initTrans & HWRITE, memwrite); - flopr #(16) entrydflop(HCLK, ~HRESETn, entry, entryd); - - assign HRESPCLINT = 0; // OK - assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during asynchronous MTIME accesses - - // word aligned reads - if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000}; - else assign #2 entry = {HADDR[15:2], 2'b00}; - - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(entryd[2:0]), .ByteMask(ByteMaskM)); - - // DH 2/20/21: Eventually allow MTIME to run off a separate clock - // This will require synchronizing MTIME to the system clock - // before it is read or compared to MTIMECMP. - // It will also require synchronizing the write to MTIMECMP. - // Use req and ack signals synchronized across the clock domains. - - // register access - if (`XLEN==64) begin:clint // 64-bit - always @(posedge HCLK) begin - case(entry) - 16'h0000: HREADCLINT <= {63'b0, MSIP}; - 16'h4000: HREADCLINT <= MTIMECMP; - 16'hBFF8: HREADCLINT <= MTIME; - default: HREADCLINT <= 0; - endcase - end - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MSIP <= 0; - MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts - end else if (memwrite) begin - if (entryd == 16'h0000) MSIP <= HWDATA[0]; - if (entryd == 16'h4000) begin - for(i=0;i<`XLEN/8;i++) - if(ByteMaskM[i]) - MTIMECMP[i*8 +: 8] <= HWDATA[i*8 +: 8]; // ***dh: this notation isn't in book yet - maybe from Ross - end - end - -// eventually replace MTIME logic below with timereg -// timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), 1'b0, HWDATA, MTIME, done); - - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MTIME <= 0; - end else if (memwrite & entryd == 16'hBFF8) begin - // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed - for(j=0;j<`XLEN/8;j++) - if(ByteMaskM[j]) - MTIME[j*8 +: 8] <= HWDATA[j*8 +: 8]; - end else MTIME <= MTIME + 1; - end else begin:clint // 32-bit - always @(posedge HCLK) begin - case(entry) - 16'h0000: HREADCLINT <= {31'b0, MSIP}; - 16'h4000: HREADCLINT <= MTIMECMP[31:0]; - 16'h4004: HREADCLINT <= MTIMECMP[63:32]; - 16'hBFF8: HREADCLINT <= MTIME[31:0]; - 16'hBFFC: HREADCLINT <= MTIME[63:32]; - default: HREADCLINT <= 0; - endcase - end - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MSIP <= 0; - MTIMECMP <= 0; - // MTIMECMP is not reset ***? - end else if (memwrite) begin - if (entryd == 16'h0000) MSIP <= HWDATA[0]; - if (entryd == 16'h4000) - for(j=0;j<`XLEN/8;j++) - if(ByteMaskM[j]) - MTIMECMP[j*8 +: 8] <= HWDATA[j*8 +: 8]; - if (entryd == 16'h4004) - for(j=0;j<`XLEN/8;j++) - if(ByteMaskM[j]) - MTIMECMP[32 + j*8 +: 8] <= HWDATA[j*8 +: 8]; - // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed - end - -// eventually replace MTIME logic below with timereg -// timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), memwrite & (entryd == 16'hBFFC), HWDATA, MTIME, done); - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin - MTIME <= 0; - // MTIMECMP is not reset - end else if (memwrite & (entryd == 16'hBFF8)) begin - for(i=0;i<`XLEN/8;i++) - if(ByteMaskM[i]) - MTIME[i*8 +: 8] <= HWDATA[i*8 +: 8]; - end else if (memwrite & (entryd == 16'hBFFC)) begin - // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed - for(i=0;i<`XLEN/8;i++) - if(ByteMaskM[i]) - MTIME[32 + i*8 +: 8]<= HWDATA[i*8 +: 8]; - end else MTIME <= MTIME + 1; - end - - // Software interrupt when MSIP is set - assign MSwInt = MSIP; - // Timer interrupt when MTIME >= MTIMECMP - assign MTimerInt = ({1'b0, MTIME} >= {1'b0, MTIMECMP}); // unsigned comparison - -endmodule - -module timeregsync( - input logic clk, resetn, - input logic we0, we1, - input logic [`XLEN-1:0] wd, - output logic [63:0] q); - - if (`XLEN==64) - always_ff @(posedge clk or negedge resetn) - if (~resetn) q <= 0; - else if (we0) q <= wd; - else q <= q + 1; - else - always_ff @(posedge clk or negedge resetn) - if (~resetn) q <= 0; - else if (we0) q[31:0] <= wd; - else if (we1) q[63:32] <= wd; - else q <= q + 1; -endmodule - -module timereg( - input logic HCLK, HRESETn, TIMECLK, - input logic we0, we1, - input logic [`XLEN-1:0] HWDATA, - output logic [63:0] MTIME, - output logic done); - -// if (`TIMEBASE_SYNC) begin:timereg // use HCLK for MTIME - if (1) begin:timereg // use HCLK for MTIME - timregsync timeregsync(.clk(HCLK), .resetn(HRESETn), .we0, .we1, .wd(HWDATA), .q(MTIME)); - assign done = 1; // immediately completes - end else begin // use asynchronous TIMECLK - // TIME counter runs on TIMECLK but bus interface runs on HCLK - // Need to synchronize reads and writes - // This is subtle because synchronizing a binary counter on a per-bit basis could give a mix of old and new bits - // Instead, we use a Gray coded counter that only changes one bit per cycle - // Synchronizing this for a read is safe because we are guaranteed to get either the old or the new value. - // Writing to the counter requires a request/acknowledge handshake to ensure the write value is held long enough. - // The handshake signals are synchronized in each direction across the interface - // There is no back pressure on instructions, so if multiple counter writes occur *** - - logic req, req_sync, ack, we0_stored, we1_stored, ack_stored, resetn_sync; - logic [`XLEN-1:0] wd_stored; - logic [63:0] time_int, time_int_gc, time_gc, MTIME_GC; - - // When a write enable is asserted for a cycle, sample the enables and data and raise a request until it is acknowledged - // When the acknowledge falls, the transaction is done and the system is ready for another write. - // ***look at redoing this assuming write enable and data are held rather than pulsed. - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) - req <= 0; // don't bother resetting wd - else begin - req <= we0 | we1 | req & ~ack; - we0_stored <= we0; - we1_stored <= we1; - wd_stored <= HWDATA; - ack_stored <= ack; - done <= ack_stored & ~ack; - end - - // synchronize the reset and reqest into the TIMECLK domain - sync resetsync(TIMECLK, HRESETn, resetn_sync); - sync rsync(TIMECLK, req, req_sync); - // synchronize the acknowledge back to the HCLK domain to indicate the request was handled and can be lowered - sync async(HCLK, req_sync, ack); - - timeregsync timeregsync(.clk(TIMECLK), .resetn(resetn_sync), .we0(we0_stored), .we1(we1_stored), .wd(wd_stored), .q(time_int)); - binarytogray b2g(time_int, time_int_gc); - flop gcreg(TIMECLK, time_int_gc, time_gc); - - sync timesync[63:0](HCLK, time_gc, MTIME_GC); - graytobinary g2b(MTIME_GC, MTIME); - end -endmodule - -module binarytogray #(parameter N = `XLEN) ( - input logic [N-1:0] b, - output logic [N-1:0] g); - - // G[N-1] = B[N-1]; G[i] = B[i] ^ B[i+1] for 0 <= i < N-1 - // requires single layer of N-1 XOR gates - assign g = b ^ {1'b0, b[N-1:1]}; -endmodule - -module graytobinary #(parameter N = `XLEN) ( - input logic [N-1:0] g, - output logic [N-1:0] b); - - // B[N-1] = G[N-1]; B[i] = G[i] ^ B[i+1] for 0 <= i < N-1 - // requires rippling through N-1 XOR gates - genvar i; - assign b[N-1] = g[N-1]; - for (i=N-2; i >= 0; i--) begin:g2b - assign b[i] = g[i] ^ b[i+1]; - end -endmodule -*/ \ No newline at end of file diff --git a/pipelined/src/uncore/gpio.sv b/pipelined/src/uncore/gpio.sv deleted file mode 100644 index 30ef0e31..00000000 --- a/pipelined/src/uncore/gpio.sv +++ /dev/null @@ -1,161 +0,0 @@ -/////////////////////////////////////////// -// gpio.sv -// -// Written: David_Harris@hmc.edu 14 January 2021 -// Modified: bbracker@hmc.edu 15 Apr. 2021 -// -// Purpose: General Purpose I/O peripheral -// See FE310-G002-Manual-v19p05 for specifications -// No interrupts, drive strength, or pull-ups supported -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module gpio ( - input logic HCLK, HRESETn, - input logic HSELGPIO, - input logic [7:0] HADDR, - input logic [`XLEN-1:0] HWDATA, - input logic HWRITE, - input logic HREADY, - input logic [1:0] HTRANS, - output logic [`XLEN-1:0] HREADGPIO, - output logic HRESPGPIO, HREADYGPIO, - input logic [31:0] GPIOPinsIn, - output logic [31:0] GPIOPinsOut, GPIOPinsEn, - output logic GPIOIntr); - - logic [31:0] input0d, input1d, input2d, input3d; - logic [31:0] input_val, input_en, output_en, output_val; - logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor; - - logic initTrans, memwrite; - logic [7:0] entry, entryd; - logic [31:0] Din, Dout; - - // AHB I/O - assign entry = {HADDR[7:2],2'b0}; - assign initTrans = HREADY & HSELGPIO & (HTRANS != 2'b00); - // entryd and memwrite are delayed by a cycle because AHB controller waits a cycle before outputting write data - flopr #(1) memwriteflop(HCLK, ~HRESETn, initTrans & HWRITE, memwrite); - flopr #(8) entrydflop(HCLK, ~HRESETn, entry, entryd); - assign HRESPGPIO = 0; // OK - assign HREADYGPIO = 1'b1; // GPIO never takes >1 cycle to respond - - // account for subword read/write circuitry - // -- Note GPIO registers are 32 bits no matter what; access them with LW SW. - // (At least that's what I think when FE310 spec says "only naturally aligned 32-bit accesses are supported") - if (`XLEN == 64) begin - assign Din = entryd[2] ? HWDATA[63:32] : HWDATA[31:0]; - assign HREADGPIO = entryd[2] ? {Dout,32'b0} : {32'b0,Dout}; - end else begin // 32-bit - assign Din = HWDATA[31:0]; - assign HREADGPIO = Dout; - end - - // register access - always_ff @(posedge HCLK, negedge HRESETn) begin - // writes - if (~HRESETn) begin - // asynch reset - input_en <= 0; - output_en <= 0; - // *** synch reset not yet implemented - output_val <= #1 0; - rise_ie <= #1 0; - rise_ip <= #1 0; - fall_ie <= #1 0; - fall_ip <= #1 0; - high_ie <= #1 0; - high_ip <= #1 0; - low_ie <= #1 0; - low_ip <= #1 0; - out_xor <= #1 0; - end else begin - // writes - if (memwrite) - // According to FE310 spec: Once the interrupt is pending, it will remain set until a 1 is written to the *_ip register at that bit. - /* verilator lint_off CASEINCOMPLETE */ - case(entryd) - 8'h04: input_en <= #1 Din; - 8'h08: output_en <= #1 Din; - 8'h0C: output_val <= #1 Din; - 8'h18: rise_ie <= #1 Din; - 8'h20: fall_ie <= #1 Din; - 8'h28: high_ie <= #1 Din; - 8'h30: low_ie <= #1 Din; - 8'h40: out_xor <= #1 Din; - endcase - /* verilator lint_on CASEINCOMPLETE */ - // reads - case(entry) - 8'h00: Dout <= #1 input_val; - 8'h04: Dout <= #1 input_en; - 8'h08: Dout <= #1 output_en; - 8'h0C: Dout <= #1 output_val; - 8'h18: Dout <= #1 rise_ie; - 8'h1C: Dout <= #1 rise_ip; - 8'h20: Dout <= #1 fall_ie; - 8'h24: Dout <= #1 fall_ip; - 8'h28: Dout <= #1 high_ie; - 8'h2C: Dout <= #1 high_ip; - 8'h30: Dout <= #1 low_ie; - 8'h34: Dout <= #1 low_ip; - 8'h40: Dout <= #1 out_xor; - default: Dout <= #1 0; - endcase - // interrupts - if (memwrite & (entryd == 8'h1C)) - rise_ip <= rise_ip & ~Din; - else - rise_ip <= rise_ip | (input2d & ~input3d); - if (memwrite & (entryd == 8'h24)) - fall_ip <= fall_ip & ~Din; - else - fall_ip <= fall_ip | (~input2d & input3d); - if (memwrite & (entryd == 8'h2C)) - high_ip <= high_ip & ~Din; - else - high_ip <= high_ip | input3d; - if (memwrite & (entryd == 8'h34)) - low_ip <= low_ip & ~Din; - else - low_ip <= low_ip | ~input3d; - end - end - - // chip i/o - // connect OUT to IN for loopback testing - if (`GPIO_LOOPBACK_TEST) assign input0d = ((output_en & GPIOPinsOut) | (~output_en & GPIOPinsIn)) & input_en; - else assign input0d = GPIOPinsIn & input_en; - flop #(32) sync1(HCLK,input0d,input1d); - flop #(32) sync2(HCLK,input1d,input2d); - flop #(32) sync3(HCLK,input2d,input3d); - assign input_val = input3d; - assign GPIOPinsOut = output_val ^ out_xor; - assign GPIOPinsEn = output_en; - - assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)}; -endmodule - diff --git a/pipelined/src/uncore/plic.sv b/pipelined/src/uncore/plic.sv deleted file mode 100644 index ef27e5be..00000000 --- a/pipelined/src/uncore/plic.sv +++ /dev/null @@ -1,261 +0,0 @@ -/////////////////////////////////////////// -// plic.sv -// -// Written: bbracker@hmc.edu 18 January 2021 -// Modified: -// -// Purpose: Platform-Level Interrupt Controller -// Based on RISC-V spec (https://github.com/riscv/riscv-plic-spec/blob/master/riscv-plic.adoc) -// With clarifications from ROA's existing implementation (https://roalogic.github.io/plic/docs/AHB-Lite_PLIC_Datasheet.pdf) -// Supports only 1 target core and only a global threshold. -// -// *** Big questions: -// Do we detect requests as level-triggered or edge-trigged? -// If edge-triggered, do we want to allow 1 source to be able to make a number of repeated requests? -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -/* -`include "wally-config.vh" - -`define N `PLIC_NUM_SRC -// number of interrupt sources -// does not include source 0, which does not connect to anything according to spec -// up to 63 sources supported; *** in the future, allow up to 1023 sources - -`define C 2 -// number of conexts -// hardcoded to 2 contexts for now; *** later upgrade to arbitrary (up to 15872) contexts - -module plic ( - input logic HCLK, HRESETn, - input logic HSELPLIC, - input logic [27:0] HADDR, // *** could factor out entryd into HADDRd at the level of uncore - input logic HWRITE, - input logic HREADY, - input logic [1:0] HTRANS, - input logic [`XLEN-1:0] HWDATA, - input logic UARTIntr,GPIOIntr, - output logic [`XLEN-1:0] HREADPLIC, - output logic HRESPPLIC, HREADYPLIC, - (* mark_debug = "true" *) output logic MExtInt, SExtInt); - - logic memwrite, memread, initTrans; - logic [23:0] entry, entryd; - logic [31:0] Din, Dout; - - // context-independent signals - (* mark_debug = "true" *) logic [`N:1] requests; - (* mark_debug = "true" *) logic [`N:1][2:0] intPriority; - (* mark_debug = "true" *) logic [`N:1] intInProgress, intPending, nextIntPending; - - // context-dependent signals - logic [`C-1:0][2:0] intThreshold; - (* mark_debug = "true" *) logic [`C-1:0][`N:1] intEn; - logic [`C-1:0][5:0] intClaim; // ID's are 6 bits if we stay within 63 sources - (* mark_debug = "true" *) logic [`C-1:0][7:1][`N:1] irqMatrix; - logic [`C-1:0][7:1] priorities_with_irqs; - logic [`C-1:0][7:1] max_priority_with_irqs; - logic [`C-1:0][`N:1] irqs_at_max_priority; - logic [`C-1:0][7:1] threshMask; - - // ======= - // AHB I/O - // ======= - assign entry = {HADDR[23:2],2'b0}; - assign initTrans = HREADY & HSELPLIC & (HTRANS != 2'b00); - assign memread = initTrans & ~HWRITE; - // entryd and memwrite are delayed by a cycle because AHB controller waits a cycle before outputting write data - flopr #(1) memwriteflop(HCLK, ~HRESETn, initTrans & HWRITE, memwrite); - flopr #(24) entrydflop(HCLK, ~HRESETn, entry, entryd); - assign HRESPPLIC = 0; // OK - assign HREADYPLIC = 1'b1; // PLIC never takes >1 cycle to respond - - // account for subword read/write circuitry - // -- Note PLIC registers are 32 bits no matter what; access them with LW SW. - if (`XLEN == 64) begin - assign Din = entryd[2] ? HWDATA[63:32] : HWDATA[31:0]; - assign HREADPLIC = entryd[2] ? {Dout,32'b0} : {32'b0,Dout}; - end else begin // 32-bit - assign HREADPLIC = Dout; - assign Din = HWDATA[31:0]; - end - - // ================== - // Register Interface - // ================== - always @(posedge HCLK,negedge HRESETn) begin - // resetting - if (~HRESETn) begin - intPriority <= #1 {`N{3'b0}}; - intEn <= #1 {2{`N'b0}}; - intThreshold <= #1 {2{3'b0}}; - intInProgress <= #1 `N'b0; - // writing - end else begin - if (memwrite) - casez(entryd) - 24'h0000??: intPriority[entryd[7:2]] <= #1 Din[2:0]; - `ifdef PLIC_NUM_SRC_LT_32 // *** switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources - 24'h002000: intEn[0][`N:1] <= #1 Din[`N:1]; - 24'h002080: intEn[1][`N:1] <= #1 Din[`N:1]; - `endif - `ifndef PLIC_NUM_SRC_LT_32 - 24'h002000: intEn[0][31:1] <= #1 Din[31:1]; - 24'h002004: intEn[0][`N:32] <= #1 Din[31:0]; - 24'h002080: intEn[1][31:1] <= #1 Din[31:1]; - 24'h002084: intEn[1][`N:32] <= #1 Din[31:0]; - `endif - 24'h200000: intThreshold[0] <= #1 Din[2:0]; - 24'h200004: intInProgress <= #1 intInProgress & ~(`N'b1 << (Din[5:0]-1)); // lower "InProgress" to signify completion - 24'h201000: intThreshold[1] <= #1 Din[2:0]; - 24'h201004: intInProgress <= #1 intInProgress & ~(`N'b1 << (Din[5:0]-1)); // lower "InProgress" to signify completion - endcase - // reading - if (memread) - casez(entry) - 24'h0000??: Dout <= #1 {29'b0,intPriority[entry[7:2]]}; - `ifdef PLIC_NUM_SRC_LT_32 - 24'h001000: Dout <= #1 {{(31-`N){1'b0}},intPending,1'b0}; - 24'h002000: Dout <= #1 {{(31-`N){1'b0}},intEn[0],1'b0}; - 24'h002080: Dout <= #1 {{(31-`N){1'b0}},intEn[1],1'b0}; - `endif - `ifndef PLIC_NUM_SRC_LT_32 - 24'h001000: Dout <= #1 {intPending[31:1],1'b0}; - 24'h001004: Dout <= #1 {{(63-`N){1'b0}},intPending[`N:32]}; - 24'h002000: Dout <= #1 {intEn[0][31:1],1'b0}; - 24'h002004: Dout <= #1 {{(63-`N){1'b0}},intEn[0][`N:32]}; - 24'h002080: Dout <= #1 {intEn[0][31:1],1'b0}; - 24'h002084: Dout <= #1 {{(63-`N){1'b0}},intEn[1][`N:32]}; - `endif - 24'h200000: Dout <= #1 {29'b0,intThreshold[0]}; - 24'h200004: begin - Dout <= #1 {26'b0,intClaim[0]}; - intInProgress <= #1 intInProgress | (`N'b1 << (intClaim[0]-1)); // claimed requests are currently in progress of being serviced until they are completed - end - 24'h201000: Dout <= #1 {29'b0,intThreshold[1]}; - 24'h201004: begin - Dout <= #1 {26'b0,intClaim[1]}; - intInProgress <= #1 intInProgress | (`N'b1 << (intClaim[1]-1)); // claimed requests are currently in progress of being serviced until they are completed - end - default: Dout <= #1 32'h0; // invalid access - endcase - else - Dout <= #1 32'h0; - end - end - - // connect sources to requests - always_comb begin - requests = `N'b0; - `ifdef PLIC_GPIO_ID - requests[`PLIC_GPIO_ID] = GPIOIntr; - `endif - `ifdef PLIC_UART_ID - requests[`PLIC_UART_ID] = UARTIntr; - `endif - end - - // pending interrupt requests - //assign nextIntPending = (intPending | requests) & ~intInProgress; // - assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion - flopr #(`N) intPendingFlop(HCLK,~HRESETn,nextIntPending,intPending); - - // context-dependent signals - genvar ctx; - for (ctx=0; ctx<`C; ctx++) begin - // request matrix - // priority level (rows) X source ID (columns) - // - // irqMatrix[ctx][pri][src] is high if source - // has priority level and has an "active" interrupt request - // ("active" meaning it is enabled in context and is pending) - genvar src, pri; - for (pri=1; pri<=7; pri++) begin - for (src=1; src<=`N; src++) begin - assign irqMatrix[ctx][pri][src] = (intPriority[src]==pri) & intPending[src] & intEn[ctx][src]; - end - end - - // which prority levels have one or more active requests? - assign priorities_with_irqs[ctx][7:1] = { - |irqMatrix[ctx][7], - |irqMatrix[ctx][6], - |irqMatrix[ctx][5], - |irqMatrix[ctx][4], - |irqMatrix[ctx][3], - |irqMatrix[ctx][2], - |irqMatrix[ctx][1] - }; - - // get the highest priority level that has active requests - assign max_priority_with_irqs[ctx][7:1] = { - priorities_with_irqs[ctx][7], - priorities_with_irqs[ctx][6] & ~|priorities_with_irqs[ctx][7], - priorities_with_irqs[ctx][5] & ~|priorities_with_irqs[ctx][7:6], - priorities_with_irqs[ctx][4] & ~|priorities_with_irqs[ctx][7:5], - priorities_with_irqs[ctx][3] & ~|priorities_with_irqs[ctx][7:4], - priorities_with_irqs[ctx][2] & ~|priorities_with_irqs[ctx][7:3], - priorities_with_irqs[ctx][1] & ~|priorities_with_irqs[ctx][7:2] - }; - - // of the sources at the highest priority level that has active requests, - // which sources have active requests? - assign irqs_at_max_priority[ctx][`N:1] = - ({`N{max_priority_with_irqs[ctx][7]}} & irqMatrix[ctx][7]) | - ({`N{max_priority_with_irqs[ctx][6]}} & irqMatrix[ctx][6]) | - ({`N{max_priority_with_irqs[ctx][5]}} & irqMatrix[ctx][5]) | - ({`N{max_priority_with_irqs[ctx][4]}} & irqMatrix[ctx][4]) | - ({`N{max_priority_with_irqs[ctx][3]}} & irqMatrix[ctx][3]) | - ({`N{max_priority_with_irqs[ctx][2]}} & irqMatrix[ctx][2]) | - ({`N{max_priority_with_irqs[ctx][1]}} & irqMatrix[ctx][1]); - - // of the sources at the highest priority level that has active requests, - // choose the source with the lowest source ID to be the most urgent - // and set intClaim to the source ID of the most urgent active request - integer k; - always_comb begin - intClaim[ctx] = 6'b0; - for (k=`N; k>0; k--) begin - if (irqs_at_max_priority[ctx][k]) intClaim[ctx] = k[5:0]; - end - end - - // create threshold mask - always_comb begin - threshMask[ctx][7] = (intThreshold[ctx] != 7); - threshMask[ctx][6] = (intThreshold[ctx] != 6) & threshMask[ctx][7]; - threshMask[ctx][5] = (intThreshold[ctx] != 5) & threshMask[ctx][6]; - threshMask[ctx][4] = (intThreshold[ctx] != 4) & threshMask[ctx][5]; - threshMask[ctx][3] = (intThreshold[ctx] != 3) & threshMask[ctx][4]; - threshMask[ctx][2] = (intThreshold[ctx] != 2) & threshMask[ctx][3]; - threshMask[ctx][1] = (intThreshold[ctx] != 1) & threshMask[ctx][2]; - end - end - // is the max priority > threshold? - // *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold? - assign MExtInt = |(threshMask[0] & priorities_with_irqs[0]); - assign SExtInt = |(threshMask[1] & priorities_with_irqs[1]); -endmodule - -*/ \ No newline at end of file diff --git a/pipelined/src/uncore/plic_apb.sv b/pipelined/src/uncore/plic_apb.sv index 90487baf..f83033c4 100644 --- a/pipelined/src/uncore/plic_apb.sv +++ b/pipelined/src/uncore/plic_apb.sv @@ -56,16 +56,6 @@ module plic_apb ( input logic PENABLE, output logic [`XLEN-1:0] PRDATA, output logic PREADY, -/* - input logic PCLK, PRESETn, - input logic HSELPLIC, - input logic [27:0] HADDR, // *** could factor out entry into HADDRd at the level of uncore - input logic HWRITE, - input logic HREADY, - input logic [1:0] HTRANS, - input logic [`XLEN-1:0] HWDATA, - output logic [`XLEN-1:0] PRDATA, - output logic HRESPPLIC, HREADYPLIC, */ input logic UARTIntr,GPIOIntr, (* mark_debug = "true" *) output logic MExtInt, SExtInt); @@ -96,14 +86,6 @@ module plic_apb ( assign memread = ~PWRITE & PSEL; // read at start of access phase. PENABLE hasn't set up before this assign PREADY = 1'b1; // PLIC never takes >1 cycle to respond assign entry = {PADDR[23:2],2'b0}; - /* - assign initTrans = HREADY & HSELPLIC & (HTRANS != 2'b00); - assign memread = initTrans & ~HWRITE; - // entryd and memwrite are delayed by a cycle because AHB controller waits a cycle before outputting write data - flopr #(1) memwriteflop(PCLK, ~HRESETn, initTrans & HWRITE, memwrite); - flopr #(24) entrydflop(PCLK, ~PRESETn, entry, entryd); - assign HRESPPLIC = 0; // OK - assign HREADYPLIC = 1'b1; // PLIC never takes >1 cycle to respond */ // account for subword read/write circuitry // -- Note PLIC registers are 32 bits no matter what; access them with LW SW. diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 39094142..b850321e 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -39,7 +39,6 @@ module ram #(parameter BASE=0, RANGE = 65535) ( input logic [1:0] HTRANS, input logic [`XLEN-1:0] HWDATA, input logic [`XLEN/8-1:0] HWSTRB, - input logic [3:0] HSIZED, output logic [`XLEN-1:0] HREADRam, output logic HRESPRam, HREADYRam ); @@ -70,17 +69,8 @@ module ram #(parameter BASE=0, RANGE = 65535) ( // On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr); - // Byte mask for subword writes - // ***the CLINT and other peripherals duplicate this hardware - // *** it shoudl be centralized and sent over HWSTRB - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask)); - - always @(posedge HCLK) begin - assert (ByteMask == HWSTRB | ~memwriteD) else $display("HSIZED %b HADDRD %b ByteMask %b HWSTRB %b\n", HSIZED[1:0], HADDRD[2:0], ByteMask, HWSTRB); - end - // single-ported RAM bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH) - memory(.clk(HCLK), .we(memwriteD), /*.bwe(HWSTRB), */ .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA)); + memory(.clk(HCLK), .we(memwriteD), .bwe(HWSTRB), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA)); endmodule diff --git a/pipelined/src/uncore/ram_orig.sv b/pipelined/src/uncore/ram_orig.sv deleted file mode 100644 index bc852cf0..00000000 --- a/pipelined/src/uncore/ram_orig.sv +++ /dev/null @@ -1,107 +0,0 @@ -/////////////////////////////////////////// -// ram_orig.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: On-chip RAM, external to core -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module ram_orig #(parameter BASE=0, RANGE = 65535) ( - input logic HCLK, HRESETn, - input logic HSELRam, - input logic [31:0] HADDR, - input logic HWRITE, - input logic HREADY, - input logic [1:0] HTRANS, - input logic [`XLEN-1:0] HWDATA, - input logic [3:0] HSIZED, - output logic [`XLEN-1:0] HREADRam, - output logic HRESPRam, HREADYRam -); - - // Desired changes. - // 1. find a way to merge read and write address into 1 port. - // 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.) - // 3. implement burst. - // 4. remove the configurable latency. - - logic [`XLEN/8-1:0] ByteMaskM; - logic [31:0] HWADDR, A; - logic prevHREADYRam, risingHREADYRam; - logic initTrans; - logic memwrite; - logic [3:0] busycount; - - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HWADDR[2:0]), .ByteMask(ByteMaskM)); - - assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); - - // *** this seems like a weird way to use reset - flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite); - flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A); - - // busy FSM to extend READY signal - always @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin - busycount <= 0; - HREADYRam <= #1 0; - end else begin - if (initTrans) begin - busycount <= 0; - HREADYRam <= #1 0; - end else if (~HREADYRam) begin - if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2 - HREADYRam <= #1 1; - end else begin - busycount <= busycount + 1; - end - end - end - assign HRESPRam = 0; // OK - - localparam ADDR_WDITH = $clog2(RANGE/8); - localparam OFFSET = $clog2(`XLEN/8); - - // Rising HREADY edge detector - // Indicates when ram is finishing up - // Needed because HREADY may go high for other reasons, - // and we only want to write data when finishing up. - flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam); - assign risingHREADYRam = HREADYRam & ~prevHREADYRam; - - always @(posedge HCLK) - HWADDR <= #1 A; - - bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA) - memory(.clk(HCLK), .reA(1'b1), - .addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam), - .weB(memwrite & risingHREADYRam), .bweB(ByteMaskM), - .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); - - -endmodule - diff --git a/pipelined/src/uncore/sdc/SDC.sv b/pipelined/src/uncore/sdc/SDC.sv index 2aa548d8..0c34e184 100644 --- a/pipelined/src/uncore/sdc/SDC.sv +++ b/pipelined/src/uncore/sdc/SDC.sv @@ -144,7 +144,7 @@ module SDC // currently does not support writes - assign InitTrans = HREADY & HSELSDC & (HTRANS != 2'b00); + assign InitTrans = HREADY & HSELSDC & HTRANS[1]; //assign RegRead = InitTrans & ~HWRITE; // register resolve combo loop flopr #(1) RegReadReg(HCLK, ~HRESETn, InitTrans & ~HWRITE, RegRead); diff --git a/pipelined/src/uncore/uart.sv b/pipelined/src/uncore/uart.sv deleted file mode 100644 index ed062035..00000000 --- a/pipelined/src/uncore/uart.sv +++ /dev/null @@ -1,107 +0,0 @@ -/////////////////////////////////////////// -// uart.sv -// -// Written: David_Harris@hmc.edu 21 January 2021 -// Modified: -// -// Purpose: Interface to Universial Asynchronous Receiver/ Transmitter with FIFOs -// Emulates interface of Texas Instruments PC165550D -// Compatible with UART in Imperas Virtio model *** -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -/* -`include "wally-config.vh" - -module uart ( - input logic HCLK, HRESETn, - input logic HSELUART, - input logic [2:0] HADDR, - input logic HWRITE, - input logic [`XLEN-1:0] HWDATA, - output logic [`XLEN-1:0] HREADUART, - output logic HRESPUART, HREADYUART, - (* mark_debug = "true" *) input logic SIN, DSRb, DCDb, CTSb, RIb, // from E1A driver from RS232 interface - (* mark_debug = "true" *) output logic SOUT, RTSb, DTRb, // to E1A driver to RS232 interface - (* mark_debug = "true" *) output logic OUT1b, OUT2b, INTR, TXRDYb, RXRDYb); // to CPU - - // UART interface signals - logic [2:0] A; - logic MEMRb, MEMWb, memread, memwrite; - logic [7:0] Din, Dout; - - // rename processor interface signals to match PC16550D and provide one-byte interface - flopr #(1) memreadreg(HCLK, ~HRESETn, (HSELUART & ~HWRITE), memread); - flopr #(1) memwritereg(HCLK, ~HRESETn, (HSELUART & HWRITE), memwrite); - flopr #(3) haddrreg(HCLK, ~HRESETn, HADDR[2:0], A); - assign MEMRb = ~memread; - assign MEMWb = ~memwrite; - - assign HRESPUART = 0; // OK - assign HREADYUART = 1; // should idle high during address phase and respond high when done; will need to be modified if UART ever needs more than 1 cycle to do something - - if (`XLEN == 64) begin:uart - always_comb begin - HREADUART = {Dout, Dout, Dout, Dout, Dout, Dout, Dout, Dout}; - case (A) - 3'b000: Din = HWDATA[7:0]; - 3'b001: Din = HWDATA[15:8]; - 3'b010: Din = HWDATA[23:16]; - 3'b011: Din = HWDATA[31:24]; - 3'b100: Din = HWDATA[39:32]; - 3'b101: Din = HWDATA[47:40]; - 3'b110: Din = HWDATA[55:48]; - 3'b111: Din = HWDATA[63:56]; - endcase - end - end else begin:uart // 32-bit - always_comb begin - HREADUART = {Dout, Dout, Dout, Dout}; - case (A[1:0]) - 2'b00: Din = HWDATA[7:0]; - 2'b01: Din = HWDATA[15:8]; - 2'b10: Din = HWDATA[23:16]; - 2'b11: Din = HWDATA[31:24]; - endcase - end - end - - logic BAUDOUTb; // loop tx clock BAUDOUTb back to rx clock RCLK - // *** make sure reads don't occur on UART unless fully selected because they could change state. This applies to all peripherals - uartPC16550D u( - // Processor Interface - .HCLK, .HRESETn, - .A, .Din, - .Dout, - .MEMRb, .MEMWb, - .INTR, .TXRDYb, .RXRDYb, - // Clocks - .BAUDOUTb, .RCLK(BAUDOUTb), - // E1A Driver - .SIN, .DSRb, .DCDb, .CTSb, .RIb, - .SOUT, .RTSb, .DTRb, .OUT1b, .OUT2b -); - -endmodule - -*/ \ No newline at end of file diff --git a/pipelined/src/uncore/uncore.sv b/pipelined/src/uncore/uncore.sv index 0ffa17bf..888489b9 100644 --- a/pipelined/src/uncore/uncore.sv +++ b/pipelined/src/uncore/uncore.sv @@ -53,7 +53,6 @@ module uncore ( output logic HSELEXT, // delayed signals input logic [2:0] HADDRD, - input logic [3:0] HSIZED, input logic HWRITED, // peripheral pins output logic MTimerInt, MSwInt, MExtInt, SExtInt, @@ -83,21 +82,14 @@ module uncore ( logic SDCIntM; logic PCLK, PRESETn, PWRITE, PENABLE; -// logic PSEL, PREADY; logic [3:0] PSEL, PREADY; logic [31:0] PADDR; logic [`XLEN-1:0] PWDATA; logic [`XLEN/8-1:0] PSTRB; logic [3:0][`XLEN-1:0] PRDATA; -// logic [`XLEN-1:0][8:0] PRDATA; logic [`XLEN-1:0] HREADBRIDGE; logic HRESPBRIDGE, HREADYBRIDGE, HSELBRIDGE, HSELBRIDGED; - // *** to do: - // hook up HWSTRB and remove subword write decoders - // add other peripherals on AHB - // HTRANS encoding - // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders // Set access types to all 1 as don't cares because the MMU has already done access checking @@ -119,7 +111,7 @@ module uncore ( .BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( .HCLK, .HRESETn, .HSELRam, .HADDR, - .HWRITE, .HREADY, .HSIZED, + .HWRITE, .HREADY, .HTRANS, .HWDATA, .HWSTRB, .HREADRam, .HRESPRam, .HREADYRam); end @@ -130,7 +122,7 @@ module uncore ( bootrom( .HCLK, .HRESETn, .HSELRam(HSELBootRom), .HADDR, - .HWRITE, .HREADY, .HTRANS, .HSIZED, + .HWRITE, .HREADY, .HTRANS, .HWDATA, .HWSTRB, .HREADRam(HREADBootRom), .HRESPRam(HRESPBootRom), .HREADYRam(HREADYBootRom)); end @@ -189,38 +181,26 @@ module uncore ( assign SDCCmdOE = 0; end - // mux could also include external memory // AHB Read Multiplexer assign HRDATA = ({`XLEN{HSELRamD}} & HREADRam) | - ({`XLEN{HSELEXTD}} & HRDATAEXT) | + ({`XLEN{HSELEXTD}} & HRDATAEXT) | ({`XLEN{HSELBRIDGED}} & HREADBRIDGE) | ({`XLEN{HSELBootRomD}} & HREADBootRom) | -// ({`XLEN{HSELUARTD}} & HREADUART) | ({`XLEN{HSELSDCD}} & HREADSDC); assign HRESP = HSELRamD & HRESPRam | HSELEXTD & HRESPEXT | -// HSELCLINTD & HRESPCLINT | -// HSELPLICD & HRESPPLIC | -// HSELGPIOD & HRESPGPIO | HSELBRIDGE & HRESPBRIDGE | HSELBootRomD & HRESPBootRom | -// HSELUARTD & HRESPUART | HSELSDC & HRESPSDC; assign HREADY = HSELRamD & HREADYRam | HSELEXTD & HREADYEXT | -// HSELCLINTD & HREADYCLINT | -// HSELPLICD & HREADYPLIC | -// HSELGPIOD & HREADYGPIO | HSELBRIDGED & HREADYBRIDGE | HSELBootRomD & HREADYBootRom | -// HSELUARTD & HREADYUART | HSELSDCD & HREADYSDC | HSELNoneD; // don't lock up the bus if no region is being accessed - // *** remove HREADYGPIO, others that are now unused - // Address Decoder Delay (figure 4-2 in spec) flopr #(9) hseldelayreg(HCLK, ~HRESETn, HSELRegions, {HSELNoneD, HSELEXTD, HSELBootRomD, HSELRamD, HSELCLINTD, HSELGPIOD, HSELUARTD, HSELPLICD, HSELSDCD}); flopr #(1) hselbridgedelayreg(HCLK, ~HRESETn, HSELBRIDGE, HSELBRIDGED); diff --git a/pipelined/src/wally/wallypipelinedsoc.sv b/pipelined/src/wally/wallypipelinedsoc.sv index ead40364..ff1d9500 100644 --- a/pipelined/src/wally/wallypipelinedsoc.sv +++ b/pipelined/src/wally/wallypipelinedsoc.sv @@ -95,7 +95,7 @@ module wallypipelinedsoc ( uncore uncore(.HCLK, .HRESETn, .TIMECLK, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HRDATAEXT, - .HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HADDRD, .HSIZED, .HWRITED, + .HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HADDRD, .HWRITED, .MTimerInt, .MSwInt, .MExtInt, .SExtInt, .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, .UARTSin, .UARTSout, .MTIME_CLINT, .HSELEXT, .SDCCmdOut, .SDCCmdOE, .SDCCmdIn, .SDCDatIn, .SDCCLK From d10ad0e8832303907bb8bff1e784eaff2e7a8a4b Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 8 Jul 2022 09:17:31 +0000 Subject: [PATCH 09/11] Removed testbench code that ignores mismatch on zero signatures --- addins/riscv-arch-test | 2 +- pipelined/testbench/testbench.sv | 20 ++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b2..be67c99b 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index c248a750..568f2722 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -284,20 +284,12 @@ logic [3:0] dummy; if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i]; else sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i]; //$display("signature[%h] = %h sig = %h", i, signature[i], sig); - if (signature[i] !== sig & - //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] & - (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1? - if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin - // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin - // report errors unless they are garbage at the end of the sim - // kind of hacky test for garbage right now - errors = errors+1; - $display(" Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", - tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]); - // tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]); - $stop;//***debug - end - end + if (signature[i] !== sig & (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin + errors = errors+1; + $display(" Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", + tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]); + $stop;//***debug + end i = i + 1; end /* verilator lint_on INFINITELOOP */ From 43549b10fbc12261ad5007b1bc9c117038666803 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 8 Jul 2022 02:27:16 -0700 Subject: [PATCH 10/11] Fixed error in gpio test --- .../riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index 4a19fffa..1b3bbdb4 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -151,7 +151,7 @@ SETUP_PLIC .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte fall_ie, 0x01000000, write32_test # enable high interrupt on bit 24, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised -.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 24, which is pending +.4byte fall_ie, 0x00000000, write32_test # disable high interrupt on bit 24, which is pending .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests From c5dfefe6690583f32f2843d2c382bb5fcd761851 Mon Sep 17 00:00:00 2001 From: James Stine Date: Fri, 8 Jul 2022 08:09:55 -0500 Subject: [PATCH 11/11] Update SRAM to /proj/wally --- pipelined/src/cache/sram1p1rw.sv | 6 +++--- .../src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pipelined/src/cache/sram1p1rw.sv b/pipelined/src/cache/sram1p1rw.sv index 134a14b0..1b853702 100644 --- a/pipelined/src/cache/sram1p1rw.sv +++ b/pipelined/src/cache/sram1p1rw.sv @@ -57,9 +57,9 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( for (index=0; index < WIDTH; index++) assign BitWriteMask[index] = ByteMask[index/8]; TS1N28HPCPSVTB64X128M4SWBASO sram( - .SLP(1'b0), .SD(1'b0), .CLK(clk), .CEB(1'b0), .WEB(~WriteEnable), - .CEBM(1'b0), .WEBM(1'b0), .AWT(1'b0), .A(Adr), .D(CacheWriteData), - .BWEB(~BitWriteMask), .AM('b0), .DM('b0), .BWEBM('b0), .BIST(1'b0), .Q(ReadData) + .CLK(clk), .CEB(1'b0), .WEB(~WriteEnable), + .A(Adr), .D(CacheWriteData), + .BWEB(~BitWriteMask), .Q(ReadData) ); end else begin diff --git a/pipelined/src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v b/pipelined/src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v index 71972067..c8197520 120000 --- a/pipelined/src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v +++ b/pipelined/src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v @@ -1 +1 @@ -/home/jstine/memory/ts1n28hpcpsvtb64x128m4swbaso_180a/VERILOG/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v \ No newline at end of file +/proj/wally/memory/ts1n28hpcpsvtb64x128m4sw_180a/VERILOG/ts1n28hpcpsvtb64x128m4sw_180a_tt1v25c.v \ No newline at end of file