This commit is contained in:
Katherine Parry 2022-05-26 20:48:30 +00:00
commit b13c3d5385
8 changed files with 102 additions and 188 deletions

@ -1 +1 @@
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86

View File

@ -1,15 +1,37 @@
# Makefile added 1/20/22 David_Harris@hmc.edu
# Compile Embench for Wally
all: Makefile
../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cflags "-O2 -march=rv32i -mabi=ilp32 -mcmodel=medany" --cc riscv64-unknown-elf-gcc
./benchmark_size.py
./benchmark_speed.py
all: build sim
# view with
# more `ls -t | head -1`
allClean: clean all
build:
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles"
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib" --cflags="-nostdlib" --dummy-libs="libgcc libm libc crt0"
sim: size speed
size:
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
speed:
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50
objdump:
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64.objdump
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/cubic/cubic > ../../addins/embench-iot/bd_speed/src/cubic/cubic.objdump
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/md5sum/md5sum > ../../addins/embench-iot/bd_speed/src/md5sum/md5sum.objdump
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/statemate/statemate > ../../addins/embench-iot/bd_speed/src/statemate/statemate.objdump
clean:
rm -rf ../../addins/embench-iot/bd_speed/
rm -rf ../../addins/embench-iot/bd_size/
# std:
# ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm"
# riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
# --dummy-libs="libgcc libm libc"
# --cflags "-O2 -g -nostartfiles"
#riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
# --user-libs="-lm"
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c

View File

@ -1,7 +0,0 @@
# Makefile added 1/20/22 David_Harris@hmc.edu
# Compile Embench for Wally
all: Makefile
./build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cc riscv64-unknown-elf-gcc
./benchmark_size.py
./benchmark_speed.py

View File

@ -46,7 +46,7 @@ configs = [
]
def getBuildrootTC(short):
INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
MAX_EXPECTED = 246000000
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
if short:
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
BRgrepstr=str(INSTR_LIMIT)+" instructions"

View File

@ -47,7 +47,7 @@ module srt #(parameter Nf=52) (
input logic Int, // Choose integer inputss
input logic Sqrt, // perform square root, not divide
output logic rsign,
output logic [Nf-1:0] Quot, Rem, // *** later handle integers
output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
);
@ -91,6 +91,8 @@ module srt #(parameter Nf=52) (
signcalc signcalc(.XSign, .YSign, .calcSign);
srtpostproc postproc(rp, rm, Quot);
otfc otfc(qp, qz, qm, Quot, QuotOTFC);
endmodule
module srtpostproc #(parameter N=52) (
@ -210,9 +212,24 @@ module qacc #(parameter N=55) (
end */
endmodule
//////////
// otfc //
//////////
module otfc #(parameter N=52) (
input logic qp, qz, qm,
input logic [N-1:0] Quot,
output logic [N-1:0] QuotOTFC
);
assign QuotOTFC = Quot;
endmodule
/////////
// inv //
/////////
module inv(input logic [55:0] in,
output logic [55:0] out);

View File

@ -44,7 +44,7 @@ module testbench;
logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp;
logic asign, bsign;
logic [51:0] r;
logic [51:0] r, rOTFC;
logic [54:0] rp, rm; // positive quotient digits
// Test parameters
@ -72,7 +72,7 @@ module testbench;
.SrcXFrac(afrac), .SrcYFrac(bfrac),
.SrcA('0), .SrcB('0), .Fmt(2'b00),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
.Quot(r), .Rem(), .Flags());
.Quot(r), .QuotOTFC(rOTFC), .Rem(), .Flags());
// Counter
counter counter(clk, req, done);
@ -117,6 +117,13 @@ module testbench;
$display("failed\n");
$stop;
end
if (r !== rOTFC) // Check if OTFC works
begin
errors = errors+1;
$display("OTFC is %h, should be %h\n", rOTFC, r);
$display("failed/n");
$stop;
end
if (afrac === 52'hxxxxxxxxxxxxx)
begin
$display("%d Tests completed successfully", testnum);

View File

@ -1,123 +0,0 @@
// testbench
module testbench ();
logic [63:0] op1;
logic [63:0] op2;
logic [2:0] FOpCtrlE;
logic [2:0] FrmE;
logic op_type;
logic FmtE;
logic OvEn;
logic UnEn;
logic XSgnE, YSgnE, ZSgnE;
logic XSgnM, YSgnM;
logic [10:0] XExpE, YExpE, ZExpE;
logic [10:0] XExpM, YExpM, ZExpM;
logic [52:0] XManE, YManE, ZManE;
logic [52:0] XManM, YManM, ZManM;
logic [10:0] BiasE;
logic XNaNE, YNaNE, ZNaNE;
logic XNaNM, YNaNM, ZNaNM;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XSNaNM, YSNaNM, ZSNaNM;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE;
logic XInfM, YInfM, ZInfM;
logic XExpMaxE;
logic XNormE;
logic FDivBusyE;
logic start;
logic reset;
logic XDenorm;
logic YDenorm;
logic [63:0] AS_Result;
logic [4:0] Flags;
logic Denorm;
logic done;
logic clk;
logic [63:0] yexpected;
logic [63:0] vectornum, errors; // bookkeeping variables
logic [199:0] testvectors[50000:0]; // array of testvectors
logic [7:0] flags_expected;
integer handle3;
integer desc3;
// instantiate device under test
unpack unpack(.X(op1), .Y(op2), .Z(64'h0), .FOpCtrlE, .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0),
.XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE),
.FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags));
// current fpdivsqrt does not operation on denorms yet
assign Denorm = XDenormE | YDenormE | Flags[3];
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
end
initial
begin
handle3 = $fopen("f64_div_rne.out");
$readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors);
vectornum = 0; errors = 0;
start = 1'b0;
// reset
reset = 1; #27; reset = 0;
end
initial
begin
desc3 = handle3;
// Operation (if applicable)
#0 op_type = 1'b0;
// Precision (32-bit or 64-bit)
#0 FmtE = 1'b1;
// From fctrl logic to dictate operation
#0 FOpCtrlE = 3'b000;
// Rounding Mode
#0 FrmE = 3'b000;
// Trap masking (n/a for RISC-V)
#0 OvEn = 1'b0;
#0 UnEn = 1'b0;
end
always @(posedge clk)
begin
if (~reset)
begin
#0; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
#50 start = 1'b1;
repeat (2)
@(posedge clk);
// deassert start after 2 cycles
start = 1'b0;
repeat (10)
@(posedge clk);
$fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected));
vectornum = vectornum + 1;
if (testvectors[vectornum] === 200'bx) begin
$display("%d tests completed", vectornum);
$finish;
end
end // if (~reset)
$display("%d vectors processed", vectornum);
end // always @ (posedge clk)
endmodule // tb

View File

@ -87,7 +87,7 @@ logic [3:0] dummy;
"arch64m": if (`M_SUPPORTED) tests = arch64m;
"arch64d": if (`D_SUPPORTED) tests = arch64d;
"imperas64i": tests = imperas64i;
// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
//"imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
"imperas64f": if (`F_SUPPORTED) tests = imperas64f;
"imperas64d": if (`D_SUPPORTED) tests = imperas64d;
"imperas64m": if (`M_SUPPORTED) tests = imperas64m;
@ -110,7 +110,7 @@ logic [3:0] dummy;
"arch32m": if (`M_SUPPORTED) tests = arch32m;
"arch32f": if (`F_SUPPORTED) tests = arch32f;
"imperas32i": tests = imperas32i;
// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
//"imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
"wally32a": if (`A_SUPPORTED) tests = wally32a;
@ -183,7 +183,7 @@ logic [3:0] dummy;
// read test vectors into memory
pathname = tvpaths[tests[0].atoi()];
/* if (tests[0] == `IMPERASTEST)
/* if (tests[0] == `IMPERASTEST)
pathname = tvpaths[0];
else pathname = tvpaths[1]; */
memfilename = {pathname, tests[test], ".elf.memfile"};
@ -255,7 +255,7 @@ logic [3:0] dummy;
//if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
(signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1?
if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
// report errors unless they are garbage at the end of the sim
// kind of hacky test for garbage right now
$display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
@ -368,12 +368,12 @@ module riscvassertions;
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 16");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
end
endmodule
@ -409,47 +409,45 @@ module DCacheFlushFSM
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
end
integer i, j, k;
integer i, j, k;
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
end
end
end
end
end
end
end
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
end
end
end
end
end
end
end
flop #(1) doneReg(.clk, .d(start), .q(done));
endmodule