diff --git a/pipelined/regression/wally-pipelined-imperas.do b/pipelined/regression/wally-pipelined-imperas.do new file mode 100644 index 000000000..285d013fc --- /dev/null +++ b/pipelined/regression/wally-pipelined-imperas.do @@ -0,0 +1,45 @@ +# wally-pipelined.do +# +# Modification by Oklahoma State University & Harvey Mudd College +# Use with Testbench +# James Stine, 2008; David Harris 2021 +# Go Cowboys!!!!!! +# +# Takes 1:10 to run RV64IC tests using gui + +# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m" + +# Use this wally-pipelined.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do wally-pipelined.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do wally-pipelined.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +# suppress spurious warnngs about +# "Extra checking for conflicts with always_comb done at vopt time" +# because vsim will run vopt + +# start and run simulation +# remove +acc flag for faster sim during regressions if there is no need to access internal signals + # *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings. +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench_imperas.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063 +vopt +acc work.testbench -G DEBUG=1 -o workopt +vsim workopt +nowarn3829 -fatal 7 +view wave +#-- display input and output signals as hexidecimal values +add log -recursive /* +do wave.do + +run -all +noview ../testbench/testbench_imperas.sv +view wave diff --git a/pipelined/src/ifu/BTBPredictor.sv b/pipelined/src/ifu/BTBPredictor.sv index d15dae6ce..9e46858e2 100644 --- a/pipelined/src/ifu/BTBPredictor.sv +++ b/pipelined/src/ifu/BTBPredictor.sv @@ -37,13 +37,13 @@ module BTBPredictor input logic StallF, StallE, input logic [`XLEN-1:0] LookUpPC, output logic [`XLEN-1:0] TargetPC, - output logic [4:0] InstrClass, + output logic [3:0] InstrClass, output logic Valid, // update input logic UpdateEN, input logic [`XLEN-1:0] UpdatePC, input logic [`XLEN-1:0] UpdateTarget, - input logic [4:0] UpdateInstrClass, + input logic [3:0] UpdateInstrClass, input logic UpdateInvalid ); @@ -99,7 +99,7 @@ module BTBPredictor // *** need to add forwarding. // *** optimize for byte write enables - ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk), + ram2p1r1wb #(Depth, `XLEN+4) memory(.clk(clk), .reset(reset), .ra1(LookUpPCIndex), .rd1({{InstrClass, TargetPC}}), @@ -107,7 +107,7 @@ module BTBPredictor .wa2(UpdatePCIndex), .wd2({UpdateInstrClass, UpdateTarget}), .wen2(UpdateEN), - .bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right. + .bwe2({4'hF, {`XLEN{1'b1}}})); // *** definitely not right. endmodule diff --git a/pipelined/src/ifu/RAsPredictor.sv b/pipelined/src/ifu/RAsPredictor.sv index 2fb98417f..c71ff2966 100644 --- a/pipelined/src/ifu/RAsPredictor.sv +++ b/pipelined/src/ifu/RAsPredictor.sv @@ -32,14 +32,19 @@ module RASPredictor #(parameter int StackSize = 16 ) (input logic clk, - input logic reset, - input logic pop, - output logic [`XLEN-1:0] popPC, - input logic push, - input logic incr, - input logic [`XLEN-1:0] pushPC + input logic reset, + input logic PopF, + output logic [`XLEN-1:0] RASPCF, + input logic [3:0] WrongPredInstrClassD, + input logic [3:0] InstrClassD, + input logic PushE, + input logic incr, + input logic [`XLEN-1:0] PCLinkE ); + // *** need to update so it either doesn't push until the memory stage + // or need to repair flushed push. + // *** need to repair popped and then flushed returns. logic CounterEn; localparam Depth = $clog2(StackSize); @@ -47,13 +52,13 @@ module RASPredictor logic [StackSize-1:0] [`XLEN-1:0] memory; integer index; - assign CounterEn = pop | push | incr; + assign CounterEn = PopF | PushE | incr | WrongPredInstrClassD[2]; - assign PtrD = pop ? PtrM1 : PtrP1; + assign PtrD = PopF | InstrClassD[2] ? PtrM1 : PtrP1; assign PtrM1 = PtrQ - 1'b1; assign PtrP1 = PtrQ + 1'b1; - // may have to handle a push and an incr at the same time. + // may have to handle a PushE and an incr at the same time. // *** what happens if jal is executing and there is a return being flushed in Decode? flopenr #(Depth) PTR(.clk(clk), @@ -67,12 +72,12 @@ module RASPredictor if(reset) begin for(index=0; index XLEN because AHB bus width is XLEN"); + assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported"); + assert (`FLEN<=`XLEN | `DCACHE | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported"); + assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); + assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); + assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); + assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); + assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); + assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2"); + assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); + assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); + assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); + assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF"); + assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); + assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); + assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); + assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses"); + assert (`DCACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); + assert (`ICACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); + assert ((`DCACHE == 0 & `ICACHE == 0) | `BUS) else $error("Dcache and Icache requires DBUS."); + assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1"); + assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words"); + assert (`DCACHE | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally."); + assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED"); + end + + // *** DH 8/23/ +endmodule + + +/* verilator lint_on STMTDLY */ +/* verilator lint_on WIDTH */ + +module DCacheFlushFSM + (input logic clk, + input logic reset, + input logic start, + output logic done); + + genvar adr; + + logic [`XLEN-1:0] ShadowRAM[`UNCORE_RAM_BASE>>(1+`XLEN/32):(`UNCORE_RAM_RANGE+`UNCORE_RAM_BASE)>>1+(`XLEN/32)]; + + if(`DCACHE) begin + localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; + localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; + localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; + localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN; + localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN; + localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM; + +//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM; + localparam integer numwords = sramlen/`XLEN; + localparam integer lognumlines = $clog2(numlines); + localparam integer loglinebytelen = $clog2(linebytelen); + localparam integer lognumways = $clog2(numways); + localparam integer tagstart = lognumlines + loglinebytelen; + + + + genvar index, way, cacheWord; + logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [sramlen-1:0] cacheline; + logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0]; + for(index = 0; index < numlines; index++) begin + for(way = 0; way < numways; way++) begin + for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin + copyShadow #(.tagstart(tagstart), + .loglinebytelen(loglinebytelen), .sramlen(sramlen)) + copyShadow(.clk, + .start, + .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS-1-tagstart:0]), + .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), + .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), + // these dirty bit selections would be needed if dirty is moved inside the tag array. + //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]), + //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS+tagstart]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.RAM[index]), + .index(index), + .cacheWord(cacheWord), + .CacheData(CacheData[way][index][cacheWord]), + .CacheAdr(CacheAdr[way][index][cacheWord]), + .CacheTag(CacheTag[way][index][cacheWord]), + .CacheValid(CacheValid[way][index][cacheWord]), + .CacheDirty(CacheDirty[way][index][cacheWord])); + end + end + end + + integer i, j, k, l; + + always @(posedge clk) begin + if (start) begin #1 + #1 + for(i = 0; i < numlines; i++) begin + for(j = 0; j < numways; j++) begin + for(l = 0; l < cachesramwords; l++) begin + if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin + for(k = 0; k < numwords; k++) begin + //cacheline = CacheData[j][i][0]; + // does not work with modelsim + // # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions. + // see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions + //ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k]; + ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN]; + end + end + end + end + end + end + end + end + flop #(1) doneReg(.clk, .d(start), .q(done)); +endmodule + +module copyShadow + #(parameter tagstart, loglinebytelen, sramlen) + (input logic clk, + input logic start, + input logic [`PA_BITS-1:tagstart] tag, + input logic valid, dirty, + input logic [sramlen-1:0] data, + input logic [32-1:0] index, + input logic [32-1:0] cacheWord, + output logic [sramlen-1:0] CacheData, + output logic [`PA_BITS-1:0] CacheAdr, + output logic [`XLEN-1:0] CacheTag, + output logic CacheValid, + output logic CacheDirty); + + + always_ff @(posedge clk) begin + if(start) begin + CacheTag = tag; + CacheValid = valid; + CacheDirty = dirty; + CacheData = data; + CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8)); + end + end + +endmodule + +task automatic updateProgramAddrLabelArray; + input string ProgramAddrMapFile, ProgramLabelMapFile; + inout integer ProgramAddrLabelArray [string]; + // Gets the memory location of begin_signature + integer ProgramLabelMapFP, ProgramAddrMapFP; + ProgramLabelMapFP = $fopen(ProgramLabelMapFile, "r"); + ProgramAddrMapFP = $fopen(ProgramAddrMapFile, "r"); + + if (ProgramLabelMapFP & ProgramAddrMapFP) begin // check we found both files + while (!$feof(ProgramLabelMapFP)) begin + string label, adrstr; + integer returncode; + returncode = $fscanf(ProgramLabelMapFP, "%s\n", label); + returncode = $fscanf(ProgramAddrMapFP, "%s\n", adrstr); + if (ProgramAddrLabelArray.exists(label)) + ProgramAddrLabelArray[label] = adrstr.atohex(); + end + end + $fclose(ProgramLabelMapFP); + $fclose(ProgramAddrMapFP); +endtask +