This commit is contained in:
Katherine Parry 2022-05-26 20:48:30 +00:00
commit b13c3d5385
8 changed files with 102 additions and 188 deletions

@ -1 +1 @@
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86

View File

@ -1,15 +1,37 @@
# Makefile added 1/20/22 David_Harris@hmc.edu # Makefile added 1/20/22 David_Harris@hmc.edu
# Compile Embench for Wally # Compile Embench for Wally
all: Makefile all: build sim
../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cflags "-O2 -march=rv32i -mabi=ilp32 -mcmodel=medany" --cc riscv64-unknown-elf-gcc
./benchmark_size.py
./benchmark_speed.py
# view with allClean: clean all
# more `ls -t | head -1`
build:
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles"
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib" --cflags="-nostdlib" --dummy-libs="libgcc libm libc crt0"
sim: size speed
size:
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
speed:
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50
objdump:
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64.objdump
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/cubic/cubic > ../../addins/embench-iot/bd_speed/src/cubic/cubic.objdump
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/md5sum/md5sum > ../../addins/embench-iot/bd_speed/src/md5sum/md5sum.objdump
riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/statemate/statemate > ../../addins/embench-iot/bd_speed/src/statemate/statemate.objdump
clean:
rm -rf ../../addins/embench-iot/bd_speed/
rm -rf ../../addins/embench-iot/bd_size/
# std:
# ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm"
# riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
# --dummy-libs="libgcc libm libc"
# --cflags "-O2 -g -nostartfiles" # --cflags "-O2 -g -nostartfiles"
# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
# --user-libs="-lm"
#riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c # riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c

View File

@ -1,7 +0,0 @@
# Makefile added 1/20/22 David_Harris@hmc.edu
# Compile Embench for Wally
all: Makefile
./build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cc riscv64-unknown-elf-gcc
./benchmark_size.py
./benchmark_speed.py

View File

@ -46,7 +46,7 @@ configs = [
] ]
def getBuildrootTC(short): def getBuildrootTC(short):
INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
MAX_EXPECTED = 246000000 MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
if short: if short:
BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!" BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
BRgrepstr=str(INSTR_LIMIT)+" instructions" BRgrepstr=str(INSTR_LIMIT)+" instructions"

View File

@ -47,7 +47,7 @@ module srt #(parameter Nf=52) (
input logic Int, // Choose integer inputss input logic Int, // Choose integer inputss
input logic Sqrt, // perform square root, not divide input logic Sqrt, // perform square root, not divide
output logic rsign, output logic rsign,
output logic [Nf-1:0] Quot, Rem, // *** later handle integers output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
output logic [`NE-1:0] rExp, output logic [`NE-1:0] rExp,
output logic [3:0] Flags output logic [3:0] Flags
); );
@ -91,6 +91,8 @@ module srt #(parameter Nf=52) (
signcalc signcalc(.XSign, .YSign, .calcSign); signcalc signcalc(.XSign, .YSign, .calcSign);
srtpostproc postproc(rp, rm, Quot); srtpostproc postproc(rp, rm, Quot);
otfc otfc(qp, qz, qm, Quot, QuotOTFC);
endmodule endmodule
module srtpostproc #(parameter N=52) ( module srtpostproc #(parameter N=52) (
@ -210,9 +212,24 @@ module qacc #(parameter N=55) (
end */ end */
endmodule endmodule
//////////
// otfc //
//////////
module otfc #(parameter N=52) (
input logic qp, qz, qm,
input logic [N-1:0] Quot,
output logic [N-1:0] QuotOTFC
);
assign QuotOTFC = Quot;
endmodule
///////// /////////
// inv // // inv //
///////// /////////
module inv(input logic [55:0] in, module inv(input logic [55:0] in,
output logic [55:0] out); output logic [55:0] out);

View File

@ -44,7 +44,7 @@ module testbench;
logic [51:0] afrac, bfrac; logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp; logic [10:0] aExp, bExp;
logic asign, bsign; logic asign, bsign;
logic [51:0] r; logic [51:0] r, rOTFC;
logic [54:0] rp, rm; // positive quotient digits logic [54:0] rp, rm; // positive quotient digits
// Test parameters // Test parameters
@ -72,7 +72,7 @@ module testbench;
.SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcXFrac(afrac), .SrcYFrac(bfrac),
.SrcA('0), .SrcB('0), .Fmt(2'b00), .SrcA('0), .SrcB('0), .Fmt(2'b00),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
.Quot(r), .Rem(), .Flags()); .Quot(r), .QuotOTFC(rOTFC), .Rem(), .Flags());
// Counter // Counter
counter counter(clk, req, done); counter counter(clk, req, done);
@ -117,6 +117,13 @@ module testbench;
$display("failed\n"); $display("failed\n");
$stop; $stop;
end end
if (r !== rOTFC) // Check if OTFC works
begin
errors = errors+1;
$display("OTFC is %h, should be %h\n", rOTFC, r);
$display("failed/n");
$stop;
end
if (afrac === 52'hxxxxxxxxxxxxx) if (afrac === 52'hxxxxxxxxxxxxx)
begin begin
$display("%d Tests completed successfully", testnum); $display("%d Tests completed successfully", testnum);

View File

@ -1,123 +0,0 @@
// testbench
module testbench ();
logic [63:0] op1;
logic [63:0] op2;
logic [2:0] FOpCtrlE;
logic [2:0] FrmE;
logic op_type;
logic FmtE;
logic OvEn;
logic UnEn;
logic XSgnE, YSgnE, ZSgnE;
logic XSgnM, YSgnM;
logic [10:0] XExpE, YExpE, ZExpE;
logic [10:0] XExpM, YExpM, ZExpM;
logic [52:0] XManE, YManE, ZManE;
logic [52:0] XManM, YManM, ZManM;
logic [10:0] BiasE;
logic XNaNE, YNaNE, ZNaNE;
logic XNaNM, YNaNM, ZNaNM;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XSNaNM, YSNaNM, ZSNaNM;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE;
logic XInfM, YInfM, ZInfM;
logic XExpMaxE;
logic XNormE;
logic FDivBusyE;
logic start;
logic reset;
logic XDenorm;
logic YDenorm;
logic [63:0] AS_Result;
logic [4:0] Flags;
logic Denorm;
logic done;
logic clk;
logic [63:0] yexpected;
logic [63:0] vectornum, errors; // bookkeeping variables
logic [199:0] testvectors[50000:0]; // array of testvectors
logic [7:0] flags_expected;
integer handle3;
integer desc3;
// instantiate device under test
unpack unpack(.X(op1), .Y(op2), .Z(64'h0), .FOpCtrlE, .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0),
.XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE),
.FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags));
// current fpdivsqrt does not operation on denorms yet
assign Denorm = XDenormE | YDenormE | Flags[3];
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
end
initial
begin
handle3 = $fopen("f64_div_rne.out");
$readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors);
vectornum = 0; errors = 0;
start = 1'b0;
// reset
reset = 1; #27; reset = 0;
end
initial
begin
desc3 = handle3;
// Operation (if applicable)
#0 op_type = 1'b0;
// Precision (32-bit or 64-bit)
#0 FmtE = 1'b1;
// From fctrl logic to dictate operation
#0 FOpCtrlE = 3'b000;
// Rounding Mode
#0 FrmE = 3'b000;
// Trap masking (n/a for RISC-V)
#0 OvEn = 1'b0;
#0 UnEn = 1'b0;
end
always @(posedge clk)
begin
if (~reset)
begin
#0; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
#50 start = 1'b1;
repeat (2)
@(posedge clk);
// deassert start after 2 cycles
start = 1'b0;
repeat (10)
@(posedge clk);
$fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected));
vectornum = vectornum + 1;
if (testvectors[vectornum] === 200'bx) begin
$display("%d tests completed", vectornum);
$finish;
end
end // if (~reset)
$display("%d vectors processed", vectornum);
end // always @ (posedge clk)
endmodule // tb

View File

@ -87,7 +87,7 @@ logic [3:0] dummy;
"arch64m": if (`M_SUPPORTED) tests = arch64m; "arch64m": if (`M_SUPPORTED) tests = arch64m;
"arch64d": if (`D_SUPPORTED) tests = arch64d; "arch64d": if (`D_SUPPORTED) tests = arch64d;
"imperas64i": tests = imperas64i; "imperas64i": tests = imperas64i;
// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu; //"imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
"imperas64f": if (`F_SUPPORTED) tests = imperas64f; "imperas64f": if (`F_SUPPORTED) tests = imperas64f;
"imperas64d": if (`D_SUPPORTED) tests = imperas64d; "imperas64d": if (`D_SUPPORTED) tests = imperas64d;
"imperas64m": if (`M_SUPPORTED) tests = imperas64m; "imperas64m": if (`M_SUPPORTED) tests = imperas64m;
@ -110,7 +110,7 @@ logic [3:0] dummy;
"arch32m": if (`M_SUPPORTED) tests = arch32m; "arch32m": if (`M_SUPPORTED) tests = arch32m;
"arch32f": if (`F_SUPPORTED) tests = arch32f; "arch32f": if (`F_SUPPORTED) tests = arch32f;
"imperas32i": tests = imperas32i; "imperas32i": tests = imperas32i;
// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu; //"imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f; "imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m; "imperas32m": if (`M_SUPPORTED) tests = imperas32m;
"wally32a": if (`A_SUPPORTED) tests = wally32a; "wally32a": if (`A_SUPPORTED) tests = wally32a;
@ -183,7 +183,7 @@ logic [3:0] dummy;
// read test vectors into memory // read test vectors into memory
pathname = tvpaths[tests[0].atoi()]; pathname = tvpaths[tests[0].atoi()];
/* if (tests[0] == `IMPERASTEST) /* if (tests[0] == `IMPERASTEST)
pathname = tvpaths[0]; pathname = tvpaths[0];
else pathname = tvpaths[1]; */ else pathname = tvpaths[1]; */
memfilename = {pathname, tests[test], ".elf.memfile"}; memfilename = {pathname, tests[test], ".elf.memfile"};
@ -255,7 +255,7 @@ logic [3:0] dummy;
//if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] & //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
(signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1? (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1?
if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
// report errors unless they are garbage at the end of the sim // report errors unless they are garbage at the end of the sim
// kind of hacky test for garbage right now // kind of hacky test for garbage right now
$display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx); $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
@ -368,12 +368,12 @@ module riscvassertions;
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM"); // assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS."); //assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS."); //assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 16"); assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
end end
endmodule endmodule
@ -409,47 +409,45 @@ module DCacheFlushFSM
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
for(index = 0; index < numlines; index++) begin for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart), copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen)) .loglinebytelen(loglinebytelen))
copyShadow(.clk, copyShadow(.clk,
.start, .start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
.index(index), .index(index),
.cacheWord(cacheWord), .cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]), .CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]), .CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]), .CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]), .CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord])); .CacheDirty(CacheDirty[way][index][cacheWord]));
end end
end
end end
end
integer i, j, k; integer i, j, k;
always @(posedge clk) begin always @(posedge clk) begin
if (start) begin #1 if (start) begin #1
#1 #1
for(i = 0; i < numlines; i++) begin for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
end end
end end
end end
end end
end end
end end
end
end
flop #(1) doneReg(.clk, .d(start), .q(done)); flop #(1) doneReg(.clk, .d(start), .q(done));
endmodule endmodule