diff --git a/addins/embench-iot b/addins/embench-iot index 2d2aaa7b8..261a65e0a 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 2d2aaa7b85c60219c591555b647dfa1785ffe1b3 +Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9 diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index effd553a6..307c77b26 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit effd553a6a91ed9b0ba251796a8a44505a45174f +Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 diff --git a/addins/riscv-dv b/addins/riscv-dv index cb4295f9c..a7e27bc04 160000 --- a/addins/riscv-dv +++ b/addins/riscv-dv @@ -1 +1 @@ -Subproject commit cb4295f9ce5da2881d7746015a6105adb8f09071 +Subproject commit a7e27bc046405f0dbcde091be99f5a5d564e2172 diff --git a/addins/riscv-tests b/addins/riscv-tests index 3e2bf06b0..cf04274f5 160000 --- a/addins/riscv-tests +++ b/addins/riscv-tests @@ -1 +1 @@ -Subproject commit 3e2bf06b071a77ae62c09bf07c5229d1f9397d94 +Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7 diff --git a/examples/C/fir/fir.S b/examples/C/fir/fir.S new file mode 100644 index 000000000..a96339ba7 --- /dev/null +++ b/examples/C/fir/fir.S @@ -0,0 +1,35 @@ +// fir.s +// mmasserfrye@hmc.edu 30 January 2022 +// FIR filter + +// a0 = N, a1 = M, a2 = &X, a3 = &c, a4 = &Y + +.global fir + +fir: + li t0, 0 # n = 0 = t0 + slli t6, a0, 3 # N*8 + slli t5, a1, 3 # M*8 + addi t4, t5, -8 # (M-1)*8 +for1: + bge t0, t6, end # exit outer for if n >= N + fmv.d.x f3, zero # sum = 0 = f3 + li t2, 0 # i = 0 = t2 + add t1, t4, t0 # [(M-1) + n]*8 +for2: + bge t2, t5, for1end # exit inner for if i >= M + sub t3, t1, t2 # [(M-1) + n - i]*8 + add t3, t3, a2 # t3 = offset + &X + fld f0, 0(t3) # X[n-i+(M-1)] + add t3, t2, a3 # t3 = offset + &c + fld f1, 0(t3) # c[i] + fmadd.d f3, f0, f1, f3 # sum += c[i]*X[n-i+(M-1)] + addi t2, t2, 8 # i++ + j for2 +for1end: + add t3, t0, a4 # t3 = offset + &Y + fsd f3, 0(t3) # Y[n] = sum + addi t0, t0, 8 # n++ + j for1 +end: + ret \ No newline at end of file diff --git a/examples/C/lab1matrix/Makefile b/examples/C/lab1matrix/Makefile new file mode 100644 index 000000000..18c011f4f --- /dev/null +++ b/examples/C/lab1matrix/Makefile @@ -0,0 +1,33 @@ +TARGET = matMult + +$(TARGET).objdump: $(TARGET) + riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump + spike $(TARGET) + +$(TARGET): $(TARGET).c Makefile + riscv64-unknown-elf-gcc -o $(TARGET) -g -O\ + -march=rv64gc -mabi=lp64d -mcmodel=medany \ + -nostdlib -static -lm -fno-tree-loop-distribute-patterns \ + -T../common/test.ld -I../common \ + $(TARGET).c ../common/crt.S ../common/syscalls.c +# Compiler flags: +# -o $(TARGET) defines the name of the output file +# -g generates debugging symbols for gdb +# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization +# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits +# -static forces static linking (no dynamic shared libraries on bare metal) +# -lm links the math library if necessary (when #include math.h) +# -nostdlib avoids inserting standard startup files and default libraries +# because we are using crt.s on bare metal +# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library +# -T specifies the linker file +# -I specifies the include path (e.g. for util.h) +# The last line defines the C files to compile. +# crt.S is needed as our startup file to initialize the processor +# syscalls.c implements printf through the HTIF for Spike +# other flags from riscv-tests makefiles that don't seem to be important +# -ffast-math -DPREALLOCATE=1 -std=gnu99 \ +# -fno-common -fno-builtin-printf -nostartfiles -lgcc \ + +clean: + rm -f $(TARGET) $(TARGET).objdump diff --git a/examples/C/lab1matrix/matMult b/examples/C/lab1matrix/matMult new file mode 100755 index 000000000..e3d3b25dd Binary files /dev/null and b/examples/C/lab1matrix/matMult differ diff --git a/examples/C/lab1matrix/matMult.c b/examples/C/lab1matrix/matMult.c new file mode 100644 index 000000000..b794819f9 --- /dev/null +++ b/examples/C/lab1matrix/matMult.c @@ -0,0 +1,87 @@ +// matMult.c +// mmasserfrye@hmc.edu 30 January 2022 + +#include // supports printf +#include // supports fabs +#include "util.h" // supports verify + +// puts the indicated row of length n from matrix mat into array arr +void getRow(int n, int row, double *mat, double *arr){ + int ind; + for (int i=0; i>(1+`XLEN/32); + localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32); + + initial + begin + test = 1; + totalerrors = 0; + testadr = 0; + // fill memory with defined values to reduce Xs in simulation + // Quick note the memory will need to be initialized. The C library does not + // guarantee the initialized reads. For example a strcmp can read 6 byte + // strings, but uses a load double to read them in. If the last 2 bytes are + // not initialized the compare results in an 'x' which propagates through + // the design. + if (TEST == "coremark") + for (i=MemStartAddr; i= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); + assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF"); + assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); + assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); + assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); +// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM"); + assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); + assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); + end +endmodule + + +/* verilator lint_on STMTDLY */ +/* verilator lint_on WIDTH */ + +module DCacheFlushFSM + (input logic clk, + input logic reset, + input logic start, + output logic done); + + genvar adr; + + logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)]; + + if(`DMEM == `MEM_CACHE) begin + localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; + localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; + localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; + localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; + localparam integer lognumlines = $clog2(numlines); + localparam integer loglinebytelen = $clog2(linebytelen); + localparam integer lognumways = $clog2(numways); + localparam integer tagstart = lognumlines + loglinebytelen; + + + + genvar index, way, cacheWord; + logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0]; + logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0]; + logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; + logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; + logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; + for(index = 0; index < numlines; index++) begin + for(way = 0; way < numways; way++) begin + for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin + copyShadow #(.tagstart(tagstart), + .loglinebytelen(loglinebytelen)) + copyShadow(.clk, + .start, + .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), + .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), + .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), + .index(index), + .cacheWord(cacheWord), + .CacheData(CacheData[way][index][cacheWord]), + .CacheAdr(CacheAdr[way][index][cacheWord]), + .CacheTag(CacheTag[way][index][cacheWord]), + .CacheValid(CacheValid[way][index][cacheWord]), + .CacheDirty(CacheDirty[way][index][cacheWord])); + end + end + end + + integer i, j, k; + + always @(posedge clk) begin + if (start) begin #1 + #1 + for(i = 0; i < numlines; i++) begin + for(j = 0; j < numways; j++) begin + for(k = 0; k < numwords; k++) begin + if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin + ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; + end + end + end + end + end + end + + + end + flop #(1) doneReg(.clk, .d(start), .q(done)); +endmodule + +module copyShadow + #(parameter tagstart, loglinebytelen) + (input logic clk, + input logic start, + input logic [`PA_BITS-1:tagstart] tag, + input logic valid, dirty, + input logic [`XLEN-1:0] data, + input logic [32-1:0] index, + input logic [32-1:0] cacheWord, + output logic [`XLEN-1:0] CacheData, + output logic [`PA_BITS-1:0] CacheAdr, + output logic [`XLEN-1:0] CacheTag, + output logic CacheValid, + output logic CacheDirty); + + + always_ff @(posedge clk) begin + if(start) begin + CacheTag = tag; + CacheValid = valid; + CacheDirty = dirty; + CacheData = data; + CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); + end + end + +endmodule + diff --git a/synthDC/madzscript.py b/synthDC/madzscript.py index 83295df19..ba8c97b1e 100755 --- a/synthDC/madzscript.py +++ b/synthDC/madzscript.py @@ -1,36 +1,59 @@ #!/usr/bin/python3 # from msilib.schema import File import subprocess +from multiprocessing import Pool +import csv +import re +def run_command(module, width, freq): + command = "make synth DESIGN=ppa_{}_{} TECH=sky90 DRIVE=INV FREQ={} MAXOPT=1".format(module, width, freq) + subprocess.Popen(command, shell=True) -bashCommand = "find . | grep ppa_timing.rep" -output = subprocess.check_output(['bash','-c', bashCommand]) -files = output.decode("utf-8").split('\n') -print(files) +widths = ['16', '32', '64'] +modules = ['mult'] +freqs = ['10', '4000', '5000', '6000'] -widths = [] -areas = [] -delays = [] +LoT = [] +for module in modules: + for width in widths: + for freq in freqs: + LoT += [[module, width, freq]] -for file in files: - widths += [pullNum('ports', file)/3] - areas += [pullNum('Total cell area', file)] - delays += [pullNum('delay', file)] +pool = Pool() +pool.starmap(run_command, LoT) +bashCommand = "grep 'Critical Path Length' runs/ppa_*/reports/*qor*" +outputCPL = subprocess.check_output(['bash','-c', bashCommand]) +linesCPL = outputCPL.decode("utf-8").split('\n')[:-1] -def pullNum(keyText, file): - return +bashCommand = "grep 'Design Area' runs/ppa_*/reports/*qor*" +outputDA = subprocess.check_output(['bash','-c', bashCommand]) +linesDA = outputDA.decode("utf-8").split('\n')[:-1] -# File_object = open("greppedareas","r") -# content = File_object.readlines() -# File_object.close() +cpl = re.compile('\d{1}\.\d{6}') +f = re.compile('_\d*_MHz') +wm = re.compile('ppa_\w*_\d*_qor') +da = re.compile('\d*\.\d{6}') -# LoT = [] -# for line in content: -# l = line.split(':') -# LoT += [float(l[2])] +allSynths = [] -# avg = sum(LoT)/len(LoT) +for i in range(len(linesCPL)): + line = linesCPL[i] + oneSynth = [] + mwm = wm.findall(line)[0][4:-4].split('_') + oneSynth += [mwm[0]] + oneSynth += [mwm[1]] + oneSynth += [f.findall(line)[0][1:-4]] + oneSynth += cpl.findall(line) + oneSynth += da.findall(linesDA[i]) + allSynths += [oneSynth] -# print(avg) \ No newline at end of file +file = open("ppaData.csv", "w") +writer = csv.writer(file) +writer.writerow(['Module', 'Width', 'Target Freq', 'Delay', 'Area']) + +for one in allSynths: + writer.writerow(one) + +file.close() \ No newline at end of file diff --git a/synthDC/ppa b/synthDC/ppa index 5929ca956..dcf3d15d2 100755 --- a/synthDC/ppa +++ b/synthDC/ppa @@ -3,7 +3,7 @@ # Run PPA experiments on different modules rm -rf runs/ppa* make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 & -make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=1 MAXOPT=10 & +make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=1 MAXOPT=1 & make synth DESIGN=ppa_add_64 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 & make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=4000 MAXOPT=1 & make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=4000 MAXOPT=1 & diff --git a/synthDC/ppaData.csv b/synthDC/ppaData.csv new file mode 100644 index 000000000..7cea16f7d --- /dev/null +++ b/synthDC/ppaData.csv @@ -0,0 +1,37 @@ +Module,Width,Target Freq,Delay,Area +add,16,10,2.032906,221.479998 +add,16,4000,0.249839,551.740010 +add,16,5000,0.228259,924.140017 +add,16,6000,0.225754,1120.140018 +add,32,10,4.160501,456.679995 +add,32,4000,0.280842,1730.680031 +add,32,5000,0.250500,1933.540033 +add,32,6000,0.271774,1746.360030 +add,64,10,8.474034,927.079988 +add,64,4000,0.323267,3758.300065 +add,64,5000,0.334061,3798.480071 +add,64,6000,0.328457,3749.480066 +comparator,16,10,0.576329,252.840005 +comparator,16,4000,0.249312,280.280005 +comparator,16,5000,0.199026,313.600006 +comparator,16,6000,0.166568,422.380007 +comparator,32,10,0.765874,495.880010 +comparator,32,4000,0.249950,608.580012 +comparator,32,5000,0.205372,919.240014 +comparator,32,6000,0.201200,1248.520016 +comparator,64,10,0.561562,1008.420020 +comparator,64,4000,0.249905,1437.660027 +comparator,64,5000,0.219296,2738.120023 +comparator,64,6000,0.221138,2341.220025 +mult,16,10,4.730546,3869.040009 +mult,16,4000,0.821111,9132.620147 +mult,16,5000,0.820059,9583.420143 +mult,16,6000,0.831308,8594.600132 +mult,32,10,7.575772,12412.680067 +mult,32,4000,1.091389,31262.980534 +mult,32,5000,1.092153,31497.200524 +mult,32,6000,1.084816,33519.920555 +mult,64,10,4.793300,46798.920227 +mult,64,4000,1.411752,93087.261425 +mult,64,5000,1.404875,94040.801492 +mult,64,6000,1.415466,89931.661403