filled in ppa.sv, madzscript.py now synthesizes in parallel in puts results in csv

This commit is contained in:
mmasserfrye 2022-05-12 07:22:06 +00:00
parent 9dd378098f
commit 6cba6a92ba
15 changed files with 823 additions and 68 deletions

@ -1 +1 @@
Subproject commit 2d2aaa7b85c60219c591555b647dfa1785ffe1b3
Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9

@ -1 +1 @@
Subproject commit effd553a6a91ed9b0ba251796a8a44505a45174f
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86

@ -1 +1 @@
Subproject commit cb4295f9ce5da2881d7746015a6105adb8f09071
Subproject commit a7e27bc046405f0dbcde091be99f5a5d564e2172

@ -1 +1 @@
Subproject commit 3e2bf06b071a77ae62c09bf07c5229d1f9397d94
Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7

35
examples/C/fir/fir.S Normal file
View File

@ -0,0 +1,35 @@
// fir.s
// mmasserfrye@hmc.edu 30 January 2022
// FIR filter
// a0 = N, a1 = M, a2 = &X, a3 = &c, a4 = &Y
.global fir
fir:
li t0, 0 # n = 0 = t0
slli t6, a0, 3 # N*8
slli t5, a1, 3 # M*8
addi t4, t5, -8 # (M-1)*8
for1:
bge t0, t6, end # exit outer for if n >= N
fmv.d.x f3, zero # sum = 0 = f3
li t2, 0 # i = 0 = t2
add t1, t4, t0 # [(M-1) + n]*8
for2:
bge t2, t5, for1end # exit inner for if i >= M
sub t3, t1, t2 # [(M-1) + n - i]*8
add t3, t3, a2 # t3 = offset + &X
fld f0, 0(t3) # X[n-i+(M-1)]
add t3, t2, a3 # t3 = offset + &c
fld f1, 0(t3) # c[i]
fmadd.d f3, f0, f1, f3 # sum += c[i]*X[n-i+(M-1)]
addi t2, t2, 8 # i++
j for2
for1end:
add t3, t0, a4 # t3 = offset + &Y
fsd f3, 0(t3) # Y[n] = sum
addi t0, t0, 8 # n++
j for1
end:
ret

View File

@ -0,0 +1,33 @@
TARGET = matMult
$(TARGET).objdump: $(TARGET)
riscv64-unknown-elf-objdump -S -D $(TARGET) > $(TARGET).objdump
spike $(TARGET)
$(TARGET): $(TARGET).c Makefile
riscv64-unknown-elf-gcc -o $(TARGET) -g -O\
-march=rv64gc -mabi=lp64d -mcmodel=medany \
-nostdlib -static -lm -fno-tree-loop-distribute-patterns \
-T../common/test.ld -I../common \
$(TARGET).c ../common/crt.S ../common/syscalls.c
# Compiler flags:
# -o $(TARGET) defines the name of the output file
# -g generates debugging symbols for gdb
# -O turns on basic optimization; -O3 turns on heavy optimization; omit for no optimization
# -march=rv64gc -mabi=lp64d =mcmodel=medany generates code for RV64GC with doubles and long/ptrs = 64 bits
# -static forces static linking (no dynamic shared libraries on bare metal)
# -lm links the math library if necessary (when #include math.h)
# -nostdlib avoids inserting standard startup files and default libraries
# because we are using crt.s on bare metal
# -fno-tree-loop-distribute-patterns turns replacing loops with memcpy/memset in the std library
# -T specifies the linker file
# -I specifies the include path (e.g. for util.h)
# The last line defines the C files to compile.
# crt.S is needed as our startup file to initialize the processor
# syscalls.c implements printf through the HTIF for Spike
# other flags from riscv-tests makefiles that don't seem to be important
# -ffast-math -DPREALLOCATE=1 -std=gnu99 \
# -fno-common -fno-builtin-printf -nostartfiles -lgcc \
clean:
rm -f $(TARGET) $(TARGET).objdump

BIN
examples/C/lab1matrix/matMult Executable file

Binary file not shown.

View File

@ -0,0 +1,87 @@
// matMult.c
// mmasserfrye@hmc.edu 30 January 2022
#include <stdio.h> // supports printf
#include <math.h> // supports fabs
#include "util.h" // supports verify
// puts the indicated row of length n from matrix mat into array arr
void getRow(int n, int row, double *mat, double *arr){
int ind;
for (int i=0; i<n; i++){
ind = i+row*n;
arr[i] = mat[ind];
}
}
// computes the dot product of arrays a and b of length n
double dotproduct(int n, double a[], double b[]) {
volatile int i;
double sum;
sum = 0;
for (i=0; i<n; i++) {
if (i==0) sum=0;
sum += a[i]*b[i];
}
return sum;
}
// multiplies matrices A (m1 x n1m2) and B (n1m2 x n2) and puts the result in Y
void mult(int m1, int n1m2, int n2, double *A, double *B, double *Y) {
// transpose B into Bt so we can dot product matching rows
double Bt[n2*n1m2];
int ind;
int indt;
for (int i=0; i<n1m2; i++){
for (int j=0; j<n2; j++){
ind = i*n2+j;
indt = j*n1m2+i;
Bt[indt] = B[ind];
}
}
int indY;
double Arow[n1m2];
double Bcol[n1m2];
for (int i=0; i<m1; i++){
for (int j=0; j<n2; j++){
indY = i*n2+j;
getRow(n1m2, i, A, Arow);
getRow(n1m2, j, Bt, Bcol);
Y[indY] = dotproduct(n1m2, Arow, Bcol);
}
}
}
int main(void) {
// change these bits to test stuff
int m = 20;
int n = 1;
double X[20]; // change to m*n
double Y[400]; // change to m^2
// fill in some numbers so the test feels legit
for (int i=0; i<n; i++){
X[i] = i;
}
setStats(1);
mult(m, n, m, X, X, Y);
setStats(0);
/*
// use this code from Harris's fir.c to print matrix one element at a time
// library linked doesn't support printing doubles, so convert to integers to print
for (int i=0; i<m*m; i++) {
int tmp = Y[i];
printf("Y[%d] = %d\n", i, tmp);
}
*/
return 0;
}

BIN
examples/C/sum_mixed/sum_mixed Executable file

Binary file not shown.

62
pipelined/src/fma/wave.do Normal file
View File

@ -0,0 +1,62 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench_fma16/clk
add wave -noupdate /testbench_fma16/reset
add wave -noupdate /testbench_fma16/x
add wave -noupdate /testbench_fma16/y
add wave -noupdate /testbench_fma16/z
add wave -noupdate /testbench_fma16/result
add wave -noupdate /testbench_fma16/rexpected
add wave -noupdate /testbench_fma16/dut/x
add wave -noupdate /testbench_fma16/dut/y
add wave -noupdate /testbench_fma16/dut/z
add wave -noupdate /testbench_fma16/dut/mul
add wave -noupdate /testbench_fma16/dut/add
add wave -noupdate /testbench_fma16/dut/negr
add wave -noupdate /testbench_fma16/dut/negz
add wave -noupdate /testbench_fma16/dut/roundmode
add wave -noupdate /testbench_fma16/dut/result
add wave -noupdate /testbench_fma16/dut/XManE
add wave -noupdate /testbench_fma16/dut/YManE
add wave -noupdate /testbench_fma16/dut/ZManE
add wave -noupdate /testbench_fma16/dut/XExpE
add wave -noupdate /testbench_fma16/dut/YExpE
add wave -noupdate /testbench_fma16/dut/ZExpE
add wave -noupdate /testbench_fma16/dut/PExpE
add wave -noupdate /testbench_fma16/dut/Ne
add wave -noupdate /testbench_fma16/dut/upOneExt
add wave -noupdate /testbench_fma16/dut/XSgnE
add wave -noupdate /testbench_fma16/dut/YSgnE
add wave -noupdate /testbench_fma16/dut/ZSgnE
add wave -noupdate /testbench_fma16/dut/PSgnE
add wave -noupdate /testbench_fma16/dut/ProdManE
add wave -noupdate /testbench_fma16/dut/NfracS
add wave -noupdate /testbench_fma16/dut/ProdManAl
add wave -noupdate /testbench_fma16/dut/ZManExt
add wave -noupdate /testbench_fma16/dut/ZManAl
add wave -noupdate /testbench_fma16/dut/Nfrac
add wave -noupdate /testbench_fma16/dut/res
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
add wave -noupdate /testbench_fma16/dut/NSamt
add wave -noupdate /testbench_fma16/dut/ZExpGreater
add wave -noupdate /testbench_fma16/dut/ACLess
add wave -noupdate /testbench_fma16/dut/upOne
add wave -noupdate /testbench_fma16/dut/KillProd
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
quietly wave cursor active 2
configure wave -namecolwidth 237
configure wave -valuecolwidth 64
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {4083 ns} {4235 ns}

View File

@ -1,5 +1,6 @@
// ppa.sv
// Teo Ene & David_Harris@hmc.edu 25 Feb 2021
// Teo Ene & David_Harris@hmc.edu 11 May 2022
// & mmasserfrye@hmc.edu
// Measure PPA of various building blocks
module ppa_comparator_16 #(parameter WIDTH=16) (
@ -26,8 +27,7 @@ module ppa_comparator_64 #(parameter WIDTH=64) (
ppa_comparator #(WIDTH) comp (.*);
endmodule
module ppa_comparator #(parameter WIDTH=16) (
module ppa_comparator #(parameter WIDTH=16) (
input logic [WIDTH-1:0] a, b,
input logic sgnd,
output logic [1:0] flags);
@ -45,39 +45,44 @@ endmodule
assign flags = {eq, lt};
endmodule
module ppa_add_16 #(parameter WIDTH=16) (
input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y);
assign y = a + b;
endmodule
module ppa_add_32 #(parameter WIDTH=32) (
input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y
);
output logic [WIDTH-1:0] y);
assign y = a + b;
endmodule
module ppa_add_64 #(parameter WIDTH=64) (
input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y
);
output logic [WIDTH-1:0] y);
assign y = a + b;
endmodule
module ppa_add_16 #(parameter WIDTH=16) (
input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y
);
assign y = a + b;
endmodule
module ppa_shiftleft(
assign y = a << amt;
)
module ppa_mult(
module ppa_mult_16 #(parameter WIDTH=16) (
input logic [WIDTH-1:0] a, b,
output logic [WIDTH*2-1:0] y); //is this right width
assign y = a * b;
)
endmodule
module ppa_mult_32 #(parameter WIDTH=32) (
input logic [WIDTH-1:0] a, b,
output logic [WIDTH*2-1:0] y); //is this right width
assign y = a * b;
endmodule
module ppa_mult_64 #(parameter WIDTH=64) (
input logic [WIDTH-1:0] a, b,
output logic [WIDTH*2-1:0] y); //is this right width
assign y = a * b;
endmodule
module ppa_alu #(parameter WIDTH=32) (
input logic [WIDTH-1:0] A, B,
@ -137,7 +142,13 @@ module ppa_alu #(parameter WIDTH=32) (
else assign Result = FullResult;
endmodule
module ppa_shiftleft #(parameter WIDTH=32) (
input logic [WIDTH-1:0] a,
input logic [$clog2(WIDTH)-1:0] amt,
output logic [WIDTH-1:0] y);
assign y = a << amt;
endmodule
module ppa_shifter (
input logic [`XLEN-1:0] A,
@ -186,11 +197,10 @@ endmodule
module ppa_prioritythermometer #(parameter N = 8) (
input logic [N-1:0] a,
output logic [N-1:0] y
);
output logic [N-1:0] y);
// Carefully crafted so design compiler will synthesize into a fast tree structure
// Rather than linear.
// Carefully crafted so design compiler will synthesize into a fast tree structure
// Rather than linear.
// create thermometer code mask
genvar i;
@ -200,12 +210,9 @@ module ppa_prioritythermometer #(parameter N = 8) (
end
endmodule
module ppa_priorityonehot #(parameter N = 8) (
input logic [N-1:0] a,
output logic [N-1:0] y
);
output logic [N-1:0] y);
logic [N-1:0] nolower;
// create thermometer code mask
@ -215,10 +222,9 @@ endmodule
module ppa_prioriyencoder #(parameter N = 8) (
input logic [N-1:0] a,
output logic [$clog2(N)-1:0] y
);
// Carefully crafted so design compiler will synthesize into a fast tree structure
// Rather than linear.
output logic [$clog2(N)-1:0] y);
// Carefully crafted so design compiler will synthesize into a fast tree structure
// Rather than linear.
// create thermometer code mask
genvar i;
@ -227,13 +233,14 @@ module ppa_prioriyencoder #(parameter N = 8) (
end
endmodule
module decoder
module decoder (
input logic [$clog2(N)-1:0] a,
output logic [N-1:0] y
output logic [N-1:0] y);
always_comb begin
y = 0;
y[a] = 1;
end
endmodule
module mux2 #(parameter WIDTH = 8) (
input logic [WIDTH-1:0] d0, d1,
@ -267,12 +274,12 @@ module mux6 #(parameter WIDTH = 8) (
assign y = s[2] ? (s[0] ? d5 : d4) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
endmodule
module mux8 #(parameter WIDTH = 8) ( *** add inputs
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
module mux8 #(parameter WIDTH = 8) (
input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5, d6, d7,
input logic [2:0] s,
output logic [WIDTH-1:0] y);
assign y = s[2] ? (s[0] ? d5 : d4) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
assign y = s[2] ? (s[1] ? (s[0] ? d5 : d4) : (s[0] ? d6 : d7)) : (s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0));
endmodule
// *** some way to express data-critical inputs
@ -315,5 +322,3 @@ module flopenr #(parameter WIDTH = 8) (
if (reset) q <= #1 0;
else if (en) q <= #1 d;
endmodule

View File

@ -0,0 +1,473 @@
///////////////////////////////////////////
// testbench.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Wally Testbench and helper modules
// Applies test programs from the riscv-arch-test and Imperas suites
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`include "tests.vh"
module testbench;
parameter TESTSPERIPH = 0; // set to 0 for regression
parameter TESTSPRIV = 0; // set to 0 for regression
parameter DEBUG=0;
parameter TEST="none";
logic clk;
logic reset_ext, reset;
parameter SIGNATURESIZE = 5000000;
int test, i, errors, totalerrors;
logic [31:0] sig32[0:SIGNATURESIZE];
logic [`XLEN-1:0] signature[0:SIGNATURESIZE];
logic [`XLEN-1:0] testadr;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
string tests[];
logic [3:0] dummy;
string ProgramAddrMapFile, ProgramLabelMapFile;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [31:0] HADDR;
logic [`AHBW-1:0] HWDATA;
logic HWRITE;
logic [2:0] HSIZE;
logic [2:0] HBURST;
logic [3:0] HPROT;
logic [1:0] HTRANS;
logic HMASTLOCK;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
logic DCacheFlushDone, DCacheFlushStart;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
// check assertions for a legal configuration
riscvassertions riscvassertions();
// pick tests based on modes supported
initial begin
$display("TEST is %s", TEST);
//tests = '{};
if (`XLEN == 64) begin // RV64
case (TEST)
"arch64i": tests = arch64i;
"arch64priv": tests = arch64priv;
"arch64c": if (`C_SUPPORTED)
if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv};
else tests = {arch64c};
"arch64m": if (`M_SUPPORTED) tests = arch64m;
"arch64d": if (`D_SUPPORTED) tests = arch64d;
"imperas64i": tests = imperas64i;
"imperas64p": tests = imperas64p;
// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
"imperas64f": if (`F_SUPPORTED) tests = imperas64f;
"imperas64d": if (`D_SUPPORTED) tests = imperas64d;
"imperas64m": if (`M_SUPPORTED) tests = imperas64m;
"imperas64a": if (`A_SUPPORTED) tests = imperas64a;
"imperas64c": if (`C_SUPPORTED) tests = imperas64c;
else tests = imperas64iNOc;
"testsBP64": tests = testsBP64;
"wally64i": tests = wally64i; // *** redo
"wally64priv": tests = wally64priv;// *** redo
"imperas64periph": tests = imperas64periph;
"coremark": tests = coremark;
endcase
end else begin // RV32
case (TEST)
"arch32i": tests = arch32i;
"arch32priv": tests = arch32priv;
"arch32c": if (`C_SUPPORTED)
if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv};
else tests = {arch32c};
"arch32m": if (`M_SUPPORTED) tests = arch32m;
"arch32f": if (`F_SUPPORTED) tests = arch32f;
"imperas32i": tests = imperas32i;
"imperas32p": tests = imperas32p;
// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
"imperas32a": if (`A_SUPPORTED) tests = imperas32a;
"imperas32c": if (`C_SUPPORTED) tests = imperas32c;
else tests = imperas32iNOc;
"wally32i": tests = wally32i; // *** redo
"wally32e": tests = wally32e;
"wally32priv": tests = wally32priv; // *** redo
"imperas32periph": tests = imperas32periph;
endcase
end
if (tests.size() == 0) begin
$display("TEST %s not supported in this configuration", TEST);
$stop;
end
end
string signame, memfilename, pathname;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
logic SDCCLK;
logic SDCCmdIn;
logic SDCCmdOut;
logic SDCCmdOE;
logic [3:0] SDCDatIn;
logic HREADY;
logic HSELEXT;
// instantiate device to be tested
assign GPIOPinsIn = 0;
assign UARTSin = 1;
assign HREADYEXT = 1;
assign HRESPEXT = 0;
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
.HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
.HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
.UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
dut.core.ifu.FinalInstrRawF[31:0],
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
dut.core.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests
localparam integer MemStartAddr = `RAM_BASE>>(1+`XLEN/32);
localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32);
initial
begin
test = 1;
totalerrors = 0;
testadr = 0;
// fill memory with defined values to reduce Xs in simulation
// Quick note the memory will need to be initialized. The C library does not
// guarantee the initialized reads. For example a strcmp can read 6 byte
// strings, but uses a load double to read them in. If the last 2 bytes are
// not initialized the compare results in an 'x' which propagates through
// the design.
if (TEST == "coremark")
for (i=MemStartAddr; i<MemEndAddr; i = i+1)
dut.uncore.ram.ram.RAM[i] = 64'h0;
// read test vectors into memory
pathname = tvpaths[tests[0].atoi()];
/* if (tests[0] == `IMPERASTEST)
pathname = tvpaths[0];
else pathname = tvpaths[1]; */
memfilename = {pathname, tests[test], ".elf.memfile"};
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
else $readmemh(memfilename, dut.uncore.ram.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
$display("Read memfile %s", memfilename);
reset_ext = 1; # 42; reset_ext = 0;
end
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
// if ($time % 100000 == 0) $display("Time is %0t", $time);
end
// check results
always @(negedge clk)
begin
if (TEST == "coremark")
if (dut.core.priv.priv.ecallM) begin
$display("Benchmark: coremark is done.");
$stop;
end
if (DCacheFlushDone) begin
#600; // give time for instructions in pipeline to finish
// clear signature to prevent contamination from previous tests
for(i=0; i<SIGNATURESIZE; i=i+1) begin
sig32[i] = 'bx;
end
// read signature, reformat in 64 bits if necessary
signame = {pathname, tests[test], ".signature.output"};
$readmemh(signame, sig32);
i = 0;
while (i < SIGNATURESIZE) begin
if (`XLEN == 32) begin
signature[i] = sig32[i];
i = i+1;
end else begin
signature[i/2] = {sig32[i+1], sig32[i]};
i = i + 2;
end
if (i >= 4 & sig32[i-4] === 'bx) begin
if (i == 4) begin
i = SIGNATURESIZE+1; // flag empty file
$display(" Error: empty test file");
end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
end
end
// Check errors
errors = (i == SIGNATURESIZE+1); // error if file is empty
i = 0;
testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8);
/* verilator lint_off INFINITELOOP */
while (signature[i] !== 'bx) begin
logic [`XLEN-1:0] sig;
if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.ram.RAM[testadr+i];
else sig = dut.uncore.ram.RAM[testadr+i];
// $display("signature[%h] = %h sig = %h", i, signature[i], sig);
if (signature[i] !== sig &
//if (signature[i] !== dut.core.lsu.dtim.ram.RAM[testadr+i] &
(signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1?
if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
// report errors unless they are garbage at the end of the sim
// kind of hacky test for garbage right now
$display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
errors = errors+1;
$display(" Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h",
tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
// tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.RAM[testadr+i], signature[i]);
$stop;//***debug
end
end
i = i + 1;
end
/* verilator lint_on INFINITELOOP */
if (errors == 0) begin
$display("%s succeeded. Brilliant!!!", tests[test]);
end
else begin
$display("%s failed with %d errors. :(", tests[test], errors);
totalerrors = totalerrors+1;
end
test = test + 2;
if (test == tests.size()) begin
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
else $display("FAIL: %d test programs had errors", totalerrors);
$stop;
end
else begin
//pathname = tvpaths[tests[0]];
memfilename = {pathname, tests[test], ".elf.memfile"};
//$readmemh(memfilename, dut.uncore.ram.ram.RAM);
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
else $readmemh(memfilename, dut.uncore.ram.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
$display("Read memfile %s", memfilename);
reset_ext = 1; # 47; //reset_ext = 0;
end
end
end // always @ (negedge clk)
// track the current function or global label
if (DEBUG == 1) begin : FunctionName
FunctionName FunctionName(.reset(reset),
.clk(clk),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
end
// Termination condition
// terminate on a specific ECALL after li x3,1 for old Imperas tests, *** remove this when old imperas tests are removed
// or sw gp,-56(t0) for new Imperas tests
// or sd gp, -56(t0)
// or on a jump to self infinite loop (6f) for RISC-V Arch tests
logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
if (`ZICSR_SUPPORTED) assign ecf = dut.core.priv.priv.EcallFaultM;
else assign ecf = 0;
assign DCacheFlushStart = ecf &
(dut.core.ieu.dp.regf.rf[3] == 1 |
(dut.core.ieu.dp.regf.we3 &
dut.core.ieu.dp.regf.a3 == 3 &
dut.core.ieu.dp.regf.wd3 == 1)) |
(dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM;
DCacheFlushFSM DCacheFlushFSM(.clk(clk),
.reset(reset),
.start(DCacheFlushStart),
.done(DCacheFlushDone));
// initialize the branch predictor
if (`BPRED_ENABLED == 1)
initial begin
$readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
$readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem);
end
endmodule
module riscvassertions;
initial begin
assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support");
assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled");
assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size");
assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2");
assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2");
assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF");
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
end
endmodule
/* verilator lint_on STMTDLY */
/* verilator lint_on WIDTH */
module DCacheFlushFSM
(input logic clk,
input logic reset,
input logic start,
output logic done);
genvar adr;
logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)];
if(`DMEM == `MEM_CACHE) begin
localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN;
localparam integer lognumlines = $clog2(numlines);
localparam integer loglinebytelen = $clog2(linebytelen);
localparam integer lognumways = $clog2(numways);
localparam integer tagstart = lognumlines + loglinebytelen;
genvar index, way, cacheWord;
logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
end
integer i, j, k;
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
end
end
end
end
end
end
end
flop #(1) doneReg(.clk, .d(start), .q(done));
endmodule
module copyShadow
#(parameter tagstart, loglinebytelen)
(input logic clk,
input logic start,
input logic [`PA_BITS-1:tagstart] tag,
input logic valid, dirty,
input logic [`XLEN-1:0] data,
input logic [32-1:0] index,
input logic [32-1:0] cacheWord,
output logic [`XLEN-1:0] CacheData,
output logic [`PA_BITS-1:0] CacheAdr,
output logic [`XLEN-1:0] CacheTag,
output logic CacheValid,
output logic CacheDirty);
always_ff @(posedge clk) begin
if(start) begin
CacheTag = tag;
CacheValid = valid;
CacheDirty = dirty;
CacheData = data;
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8));
end
end
endmodule

View File

@ -1,36 +1,59 @@
#!/usr/bin/python3
# from msilib.schema import File
import subprocess
from multiprocessing import Pool
import csv
import re
def run_command(module, width, freq):
command = "make synth DESIGN=ppa_{}_{} TECH=sky90 DRIVE=INV FREQ={} MAXOPT=1".format(module, width, freq)
subprocess.Popen(command, shell=True)
bashCommand = "find . | grep ppa_timing.rep"
output = subprocess.check_output(['bash','-c', bashCommand])
files = output.decode("utf-8").split('\n')
print(files)
widths = ['16', '32', '64']
modules = ['mult']
freqs = ['10', '4000', '5000', '6000']
widths = []
areas = []
delays = []
LoT = []
for module in modules:
for width in widths:
for freq in freqs:
LoT += [[module, width, freq]]
for file in files:
widths += [pullNum('ports', file)/3]
areas += [pullNum('Total cell area', file)]
delays += [pullNum('delay', file)]
pool = Pool()
pool.starmap(run_command, LoT)
bashCommand = "grep 'Critical Path Length' runs/ppa_*/reports/*qor*"
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
linesCPL = outputCPL.decode("utf-8").split('\n')[:-1]
def pullNum(keyText, file):
return
bashCommand = "grep 'Design Area' runs/ppa_*/reports/*qor*"
outputDA = subprocess.check_output(['bash','-c', bashCommand])
linesDA = outputDA.decode("utf-8").split('\n')[:-1]
# File_object = open("greppedareas","r")
# content = File_object.readlines()
# File_object.close()
cpl = re.compile('\d{1}\.\d{6}')
f = re.compile('_\d*_MHz')
wm = re.compile('ppa_\w*_\d*_qor')
da = re.compile('\d*\.\d{6}')
# LoT = []
# for line in content:
# l = line.split(':')
# LoT += [float(l[2])]
allSynths = []
# avg = sum(LoT)/len(LoT)
for i in range(len(linesCPL)):
line = linesCPL[i]
oneSynth = []
mwm = wm.findall(line)[0][4:-4].split('_')
oneSynth += [mwm[0]]
oneSynth += [mwm[1]]
oneSynth += [f.findall(line)[0][1:-4]]
oneSynth += cpl.findall(line)
oneSynth += da.findall(linesDA[i])
allSynths += [oneSynth]
# print(avg)
file = open("ppaData.csv", "w")
writer = csv.writer(file)
writer.writerow(['Module', 'Width', 'Target Freq', 'Delay', 'Area'])
for one in allSynths:
writer.writerow(one)
file.close()

View File

@ -3,7 +3,7 @@
# Run PPA experiments on different modules
rm -rf runs/ppa*
make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 &
make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=1 MAXOPT=10 &
make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=1 MAXOPT=1 &
make synth DESIGN=ppa_add_64 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 &
make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=4000 MAXOPT=1 &
make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=4000 MAXOPT=1 &

37
synthDC/ppaData.csv Normal file
View File

@ -0,0 +1,37 @@
Module,Width,Target Freq,Delay,Area
add,16,10,2.032906,221.479998
add,16,4000,0.249839,551.740010
add,16,5000,0.228259,924.140017
add,16,6000,0.225754,1120.140018
add,32,10,4.160501,456.679995
add,32,4000,0.280842,1730.680031
add,32,5000,0.250500,1933.540033
add,32,6000,0.271774,1746.360030
add,64,10,8.474034,927.079988
add,64,4000,0.323267,3758.300065
add,64,5000,0.334061,3798.480071
add,64,6000,0.328457,3749.480066
comparator,16,10,0.576329,252.840005
comparator,16,4000,0.249312,280.280005
comparator,16,5000,0.199026,313.600006
comparator,16,6000,0.166568,422.380007
comparator,32,10,0.765874,495.880010
comparator,32,4000,0.249950,608.580012
comparator,32,5000,0.205372,919.240014
comparator,32,6000,0.201200,1248.520016
comparator,64,10,0.561562,1008.420020
comparator,64,4000,0.249905,1437.660027
comparator,64,5000,0.219296,2738.120023
comparator,64,6000,0.221138,2341.220025
mult,16,10,4.730546,3869.040009
mult,16,4000,0.821111,9132.620147
mult,16,5000,0.820059,9583.420143
mult,16,6000,0.831308,8594.600132
mult,32,10,7.575772,12412.680067
mult,32,4000,1.091389,31262.980534
mult,32,5000,1.092153,31497.200524
mult,32,6000,1.084816,33519.920555
mult,64,10,4.793300,46798.920227
mult,64,4000,1.411752,93087.261425
mult,64,5000,1.404875,94040.801492
mult,64,6000,1.415466,89931.661403
1 Module Width Target Freq Delay Area
2 add 16 10 2.032906 221.479998
3 add 16 4000 0.249839 551.740010
4 add 16 5000 0.228259 924.140017
5 add 16 6000 0.225754 1120.140018
6 add 32 10 4.160501 456.679995
7 add 32 4000 0.280842 1730.680031
8 add 32 5000 0.250500 1933.540033
9 add 32 6000 0.271774 1746.360030
10 add 64 10 8.474034 927.079988
11 add 64 4000 0.323267 3758.300065
12 add 64 5000 0.334061 3798.480071
13 add 64 6000 0.328457 3749.480066
14 comparator 16 10 0.576329 252.840005
15 comparator 16 4000 0.249312 280.280005
16 comparator 16 5000 0.199026 313.600006
17 comparator 16 6000 0.166568 422.380007
18 comparator 32 10 0.765874 495.880010
19 comparator 32 4000 0.249950 608.580012
20 comparator 32 5000 0.205372 919.240014
21 comparator 32 6000 0.201200 1248.520016
22 comparator 64 10 0.561562 1008.420020
23 comparator 64 4000 0.249905 1437.660027
24 comparator 64 5000 0.219296 2738.120023
25 comparator 64 6000 0.221138 2341.220025
26 mult 16 10 4.730546 3869.040009
27 mult 16 4000 0.821111 9132.620147
28 mult 16 5000 0.820059 9583.420143
29 mult 16 6000 0.831308 8594.600132
30 mult 32 10 7.575772 12412.680067
31 mult 32 4000 1.091389 31262.980534
32 mult 32 5000 1.092153 31497.200524
33 mult 32 6000 1.084816 33519.920555
34 mult 64 10 4.793300 46798.920227
35 mult 64 4000 1.411752 93087.261425
36 mult 64 5000 1.404875 94040.801492
37 mult 64 6000 1.415466 89931.661403