From 5e875067721ffd3a7c4256ad4274f4662212b4d4 Mon Sep 17 00:00:00 2001 From: DTowersM Date: Thu, 26 May 2022 00:08:18 +0000 Subject: [PATCH 1/5] working makefile for embench and removed testbench-f64 --- benchmarks/embench/Makefile | 40 +++++++-- benchmarks/embench/Makefile~ | 7 -- pipelined/testbench/testbench-f64.sv | 123 --------------------------- 3 files changed, 31 insertions(+), 139 deletions(-) delete mode 100644 benchmarks/embench/Makefile~ delete mode 100755 pipelined/testbench/testbench-f64.sv diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index e26ed416d..3e40a68ab 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -1,15 +1,37 @@ # Makefile added 1/20/22 David_Harris@hmc.edu # Compile Embench for Wally -all: Makefile - ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cflags "-O2 -march=rv32i -mabi=ilp32 -mcmodel=medany" --cc riscv64-unknown-elf-gcc - ./benchmark_size.py - ./benchmark_speed.py +all: build sim -# view with -# more `ls -t | head -1` +allClean: clean all +build: + ../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" + ../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib" --cflags="-nostdlib" --dummy-libs="libgcc libm libc crt0" + +sim: size speed + +size: + ../../addins/embench-iot/benchmark_size.py --builddir=bd_size + +speed: + ../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50 + +objdump: + riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_speed/src/aha-mont64/aha-mont64.objdump + riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/cubic/cubic > ../../addins/embench-iot/bd_speed/src/cubic/cubic.objdump + riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/md5sum/md5sum > ../../addins/embench-iot/bd_speed/src/md5sum/md5sum.objdump + riscv64-unknown-elf-objdump -S ../../addins/embench-iot/bd_speed/src/statemate/statemate > ../../addins/embench-iot/bd_speed/src/statemate/statemate.objdump + +clean: + rm -rf ../../addins/embench-iot/bd_speed/ + rm -rf ../../addins/embench-iot/bd_size/ + +# std: +# ../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm" +# riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump +# --dummy-libs="libgcc libm libc" # --cflags "-O2 -g -nostartfiles" - - -#riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c +# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc" +# --user-libs="-lm" +# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c diff --git a/benchmarks/embench/Makefile~ b/benchmarks/embench/Makefile~ deleted file mode 100644 index ebd9a7e44..000000000 --- a/benchmarks/embench/Makefile~ +++ /dev/null @@ -1,7 +0,0 @@ -# Makefile added 1/20/22 David_Harris@hmc.edu -# Compile Embench for Wally - -all: Makefile - ./build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cc riscv64-unknown-elf-gcc - ./benchmark_size.py - ./benchmark_speed.py diff --git a/pipelined/testbench/testbench-f64.sv b/pipelined/testbench/testbench-f64.sv deleted file mode 100755 index a0c7e6a31..000000000 --- a/pipelined/testbench/testbench-f64.sv +++ /dev/null @@ -1,123 +0,0 @@ -// testbench -module testbench (); - - logic [63:0] op1; - logic [63:0] op2; - logic [2:0] FOpCtrlE; - logic [2:0] FrmE; - logic op_type; - logic FmtE; - logic OvEn; - logic UnEn; - - logic XSgnE, YSgnE, ZSgnE; - logic XSgnM, YSgnM; - logic [10:0] XExpE, YExpE, ZExpE; - logic [10:0] XExpM, YExpM, ZExpM; - logic [52:0] XManE, YManE, ZManE; - logic [52:0] XManM, YManM, ZManM; - - logic [10:0] BiasE; - logic XNaNE, YNaNE, ZNaNE; - logic XNaNM, YNaNM, ZNaNM; - logic XSNaNE, YSNaNE, ZSNaNE; - logic XSNaNM, YSNaNM, ZSNaNM; - logic XDenormE, YDenormE, ZDenormE; - logic XZeroE, YZeroE, ZZeroE; - logic XZeroM, YZeroM, ZZeroM; - logic XInfE, YInfE, ZInfE; - logic XInfM, YInfM, ZInfM; - logic XExpMaxE; - logic XNormE; - logic FDivBusyE; - - logic start; - logic reset; - - logic XDenorm; - logic YDenorm; - logic [63:0] AS_Result; - logic [4:0] Flags; - logic Denorm; - logic done; - - logic clk; - logic [63:0] yexpected; - logic [63:0] vectornum, errors; // bookkeeping variables - logic [199:0] testvectors[50000:0]; // array of testvectors - logic [7:0] flags_expected; - - integer handle3; - integer desc3; - - // instantiate device under test - unpack unpack(.X(op1), .Y(op2), .Z(64'h0), .FOpCtrlE, .FmtE, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); - fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), - .reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0), - .XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE), - .FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags)); - - - // current fpdivsqrt does not operation on denorms yet - assign Denorm = XDenormE | YDenormE | Flags[3]; - - // generate clock to sequence tests - always - begin - clk = 1; # 5; clk = 0; # 5; - end - - initial - begin - handle3 = $fopen("f64_div_rne.out"); - $readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors); - vectornum = 0; errors = 0; - start = 1'b0; - // reset - reset = 1; #27; reset = 0; - end - - initial - begin - desc3 = handle3; - // Operation (if applicable) - #0 op_type = 1'b0; - // Precision (32-bit or 64-bit) - #0 FmtE = 1'b1; - // From fctrl logic to dictate operation - #0 FOpCtrlE = 3'b000; - // Rounding Mode - #0 FrmE = 3'b000; - // Trap masking (n/a for RISC-V) - #0 OvEn = 1'b0; - #0 UnEn = 1'b0; - end - - always @(posedge clk) - begin - if (~reset) - begin - #0; {op1, op2, yexpected, flags_expected} = testvectors[vectornum]; - #50 start = 1'b1; - repeat (2) - @(posedge clk); - // deassert start after 2 cycles - start = 1'b0; - repeat (10) - @(posedge clk); - $fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected)); - vectornum = vectornum + 1; - if (testvectors[vectornum] === 200'bx) begin - $display("%d tests completed", vectornum); - $finish; - end - end // if (~reset) - $display("%d vectors processed", vectornum); - end // always @ (posedge clk) - -endmodule // tb - - From 5a9e3a852a6325542909fa0e297dcfe19f126993 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Wed, 25 May 2022 17:10:59 -0700 Subject: [PATCH 2/5] see commit 9042cc3c --- pipelined/testbench/testbench.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index b40fcf365..86760487a 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -373,7 +373,7 @@ module riscvassertions; assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); //assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS."); //assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS."); - assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 16"); + assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); end endmodule From 466fb71addf85c3891e4bab638e2ada6d0190e33 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Wed, 25 May 2022 17:40:57 -0700 Subject: [PATCH 3/5] added a todo to riscv-wally so that long buildroot looks for a successful boot rather than a specific instruction --- pipelined/regression/regression-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 3daadf769..664f99648 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -46,7 +46,7 @@ configs = [ ] def getBuildrootTC(short): INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM - MAX_EXPECTED = 246000000 + MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt. if short: BRcmd="vsim > {} -c < Date: Thu, 26 May 2022 16:45:28 +0000 Subject: [PATCH 4/5] Set up the divider for on-the-fly conversion --- addins/riscv-arch-test | 2 +- pipelined/srt/srt.sv | 19 ++++++++++++++++++- pipelined/srt/testbench.sv | 11 +++++++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index be67c99bd..307c77b26 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 +Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index bbcabccb9..c5e837180 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -47,7 +47,7 @@ module srt #(parameter Nf=52) ( input logic Int, // Choose integer inputss input logic Sqrt, // perform square root, not divide output logic rsign, - output logic [Nf-1:0] Quot, Rem, // *** later handle integers + output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers output logic [`NE-1:0] rExp, output logic [3:0] Flags ); @@ -91,6 +91,8 @@ module srt #(parameter Nf=52) ( signcalc signcalc(.XSign, .YSign, .calcSign); srtpostproc postproc(rp, rm, Quot); + + otfc otfc(qp, qz, qm, Quot, QuotOTFC); endmodule module srtpostproc #(parameter N=52) ( @@ -210,9 +212,24 @@ module qacc #(parameter N=55) ( end */ endmodule +////////// +// otfc // +////////// + +module otfc #(parameter N=52) ( + input logic qp, qz, qm, + input logic [N-1:0] Quot, + output logic [N-1:0] QuotOTFC +); + + assign QuotOTFC = Quot; + +endmodule + ///////// // inv // ///////// + module inv(input logic [55:0] in, output logic [55:0] out); diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 08b5d0d79..022fa845f 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -44,7 +44,7 @@ module testbench; logic [51:0] afrac, bfrac; logic [10:0] aExp, bExp; logic asign, bsign; - logic [51:0] r; + logic [51:0] r, rOTFC; logic [54:0] rp, rm; // positive quotient digits // Test parameters @@ -72,7 +72,7 @@ module testbench; .SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcA('0), .SrcB('0), .Fmt(2'b00), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), - .Quot(r), .Rem(), .Flags()); + .Quot(r), .QuotOTFC(rOTFC), .Rem(), .Flags()); // Counter counter counter(clk, req, done); @@ -117,6 +117,13 @@ module testbench; $display("failed\n"); $stop; end + if (r !== rOTFC) // Check if OTFC works + begin + errors = errors+1; + $display("OTFC is %h, should be %h\n", rOTFC, r); + $display("failed/n"); + $stop; + end if (afrac === 52'hxxxxxxxxxxxxx) begin $display("%d Tests completed successfully", testnum); From a983791d6403bf2d32a54dbd96e77ec9f6f8296e Mon Sep 17 00:00:00 2001 From: DTowersM Date: Thu, 26 May 2022 19:04:21 +0000 Subject: [PATCH 5/5] fixed indent spacing (cosmetic change) --- pipelined/testbench/testbench.sv | 84 ++++++++++++++++---------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 86760487a..fb338848c 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -87,7 +87,7 @@ logic [3:0] dummy; "arch64m": if (`M_SUPPORTED) tests = arch64m; "arch64d": if (`D_SUPPORTED) tests = arch64d; "imperas64i": tests = imperas64i; -// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu; + //"imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu; "imperas64f": if (`F_SUPPORTED) tests = imperas64f; "imperas64d": if (`D_SUPPORTED) tests = imperas64d; "imperas64m": if (`M_SUPPORTED) tests = imperas64m; @@ -110,7 +110,7 @@ logic [3:0] dummy; "arch32m": if (`M_SUPPORTED) tests = arch32m; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; -// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu; + //"imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; @@ -183,7 +183,7 @@ logic [3:0] dummy; // read test vectors into memory pathname = tvpaths[tests[0].atoi()]; -/* if (tests[0] == `IMPERASTEST) + /* if (tests[0] == `IMPERASTEST) pathname = tvpaths[0]; else pathname = tvpaths[1]; */ memfilename = {pathname, tests[test], ".elf.memfile"}; @@ -255,7 +255,7 @@ logic [3:0] dummy; //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] & (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1? if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin -// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin + // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin // report errors unless they are garbage at the end of the sim // kind of hacky test for garbage right now $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx); @@ -368,7 +368,7 @@ module riscvassertions; assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); -// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM"); + // assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM"); assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); //assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS."); @@ -409,47 +409,45 @@ module DCacheFlushFSM logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; - for(index = 0; index < numlines; index++) begin - for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin - copyShadow #(.tagstart(tagstart), - .loglinebytelen(loglinebytelen)) - copyShadow(.clk, - .start, - .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), - .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), - .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), - .index(index), - .cacheWord(cacheWord), - .CacheData(CacheData[way][index][cacheWord]), - .CacheAdr(CacheAdr[way][index][cacheWord]), - .CacheTag(CacheTag[way][index][cacheWord]), - .CacheValid(CacheValid[way][index][cacheWord]), - .CacheDirty(CacheDirty[way][index][cacheWord])); - end - end + for(index = 0; index < numlines; index++) begin + for(way = 0; way < numways; way++) begin + for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin + copyShadow #(.tagstart(tagstart), + .loglinebytelen(loglinebytelen)) + copyShadow(.clk, + .start, + .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), + .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), + .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), + .index(index), + .cacheWord(cacheWord), + .CacheData(CacheData[way][index][cacheWord]), + .CacheAdr(CacheAdr[way][index][cacheWord]), + .CacheTag(CacheTag[way][index][cacheWord]), + .CacheValid(CacheValid[way][index][cacheWord]), + .CacheDirty(CacheDirty[way][index][cacheWord])); + end end + end - integer i, j, k; + integer i, j, k; - always @(posedge clk) begin - if (start) begin #1 - #1 - for(i = 0; i < numlines; i++) begin - for(j = 0; j < numways; j++) begin - for(k = 0; k < numwords; k++) begin - if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin - ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; - end - end - end - end - end - end - - - end + always @(posedge clk) begin + if (start) begin #1 + #1 + for(i = 0; i < numlines; i++) begin + for(j = 0; j < numways; j++) begin + for(k = 0; k < numwords; k++) begin + if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin + ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; + end + end + end + end + end + end + end flop #(1) doneReg(.clk, .d(start), .q(done)); endmodule