diff --git a/.gitignore b/.gitignore index 6e9920603..630731b23 100644 --- a/.gitignore +++ b/.gitignore @@ -112,4 +112,6 @@ sim/results-error/ sim/test1.rep sim/vsim.log tests/coverage/*.elf -*.elf.memfile \ No newline at end of file +*.elf.memfile +sim/*Cache.log +sim/branch \ No newline at end of file diff --git a/bin/CacheSim.py b/bin/CacheSim.py new file mode 100755 index 000000000..5669b35c1 --- /dev/null +++ b/bin/CacheSim.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 + +########################################### +## CacheSim.py +## +## Written: lserafini@hmc.edu +## Created: 27 March 2023 +## Modified: 5 April 2023 +## +## Purpose: Simulate a L1 D$ or I$ for comparison with Wally +## +## A component of the CORE-V-WALLY configurable RISC-V project. +## +## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +## +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +## +## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +## except in compliance with the License, or, at your option, the Apache License version 2.0. You +## may obtain a copy of the License at +## +## https:##solderpad.org/licenses/SHL-2.1/ +## +## Unless required by applicable law or agreed to in writing, any work distributed under the +## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +## either express or implied. See the License for the specific language governing permissions +## and limitations under the License. +################################################################################################ + +# how to invoke this simulator: +# CacheSim.py -f (-v) +# so the default invocation for rv64gc is 'CacheSim.py 64 4 56 44 -f ' +# the log files to run this simulator on can be generated from testbench.sv +# by setting I_CACHE_ADDR_LOGGER and/or D_CACHE_ADDR_LOGGER to 1 before running tests. +# I (Lim) recommend logging a single set of tests (such as wally64priv) at a time. +# This helps avoid unexpected logger behavior. +# With verbose mode off, the simulator only reports mismatches between its and Wally's behavior. +# With verbose mode on, the simulator logs each access into the cache. + +import sys +import math +import argparse +import os + +class CacheLine: + def __init__(self): + self.tag = 0 + self.valid = False + self.dirty = False + + def __str__(self): + string = "(V: " + str(self.valid) + ", D: " + str(self.dirty) + string += ", Tag: " + str(hex(self.tag)) + ")" + return string + + def __repr__(self): + return self.__str__() + +class Cache: + def __init__(self, numsets, numways, addrlen, taglen): + self.numways = numways + self.numsets = numsets + + self.addrlen = addrlen + self.taglen = taglen + self.setlen = int(math.log(numsets, 2)) + self.offsetlen = self.addrlen - self.taglen - self.setlen + + self.ways = [] + for i in range(numways): + self.ways.append([]) + for j in range(numsets): + self.ways[i].append(CacheLine()) + + self.pLRU = [] + for i in range(self.numsets): + self.pLRU.append([0]*(self.numways-1)) + + # flushes the cache by setting all dirty bits to False + def flush(self): + for way in self.ways: + for line in way: + line.dirty = False + + # invalidates the cache by setting all valid bits to False + def invalidate(self): + for way in self.ways: + for line in way: + line.valid = False + + # resets the pLRU to a fresh 2-D array of 0s + def clear_pLRU(self): + self.pLRU = [] + for i in range(self.numsets): + self.pLRU.append([0]*(self.numways-1)) + + # splits the given address into tag, set, and offset + def splitaddr(self, addr): + # no need for offset in the sim, but it's here for debug + tag = addr >> (self.setlen + self.offsetlen) & int('1'*self.taglen, 2) + setnum = (addr >> self.offsetlen) & int('1'*self.setlen, 2) + offset = addr & int('1'*self.offsetlen, 2) + return tag, setnum, offset + + # performs a cache access with the given address. + # returns a character representing the outcome: + # H/M/E/D - hit, miss, eviction, or eviction with writeback + def cacheaccess(self, addr, write=False): + tag, setnum, _ = self.splitaddr(addr) + + # check our ways to see if we have a hit + for waynum in range(self.numways): + line = self.ways[waynum][setnum] + if line.tag == tag and line.valid: + line.dirty = line.dirty or write + self.update_pLRU(waynum, setnum) + return 'H' + + # we didn't hit, but we may not need to evict. + # check for an empty way line. + for waynum in range(self.numways): + line = self.ways[waynum][setnum] + if not line.valid: + line.tag = tag + line.valid = True + line.dirty = write + self.update_pLRU(waynum, setnum) + return 'M' + + # we need to evict. Select a victim and overwrite. + victim = self.getvictimway(setnum) + line = self.ways[victim][setnum] + prevdirty = line.dirty + line.tag = tag + line.valid = True # technically redundant + line.dirty = write + self.update_pLRU(victim, setnum) + return 'D' if prevdirty else 'E' + + # updates the psuedo-LRU tree for the given set + # with an access to the given way + def update_pLRU(self, waynum, setnum): + if self.numways == 1: + return + + tree = self.pLRU[setnum] + bottomrow = (self.numways - 1)//2 + index = (waynum // 2) + bottomrow + tree[index] = int(not (waynum % 2)) + while index > 0: + parent = (index-1) // 2 + tree[parent] = index % 2 + index = parent + + # uses the psuedo-LRU tree to select + # a victim way from the given set + # returns the victim way as an integer + def getvictimway(self, setnum): + if self.numways == 1: + return 0 + + tree = self.pLRU[setnum] + index = 0 + bottomrow = (self.numways - 1) // 2 #first index on the bottom row of the tree + while index < bottomrow: + if tree[index] == 0: + # Go to the left child + index = index*2 + 1 + else: #tree[index] == 1 + # Go to the right child + index = index*2 + 2 + + victim = (index - bottomrow)*2 + if tree[index] == 1: + victim += 1 + + return victim + + def __str__(self): + string = "" + for i in range(self.numways): + string += "Way " + str(i) + ": " + for line in self.ways[i]: + string += str(line) + ", " + string += "\n\n" + return string + + def __repr__(self): + return self.__str__() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Simulates a L1 cache.") + parser.add_argument('numlines', type=int, help="The number of lines per way (a power of 2)", metavar="L") + parser.add_argument('numways', type=int, help="The number of ways (a power of 2)", metavar='W') + parser.add_argument('addrlen', type=int, help="Length of the address in bits (a power of 2)", metavar="A") + parser.add_argument('taglen', type=int, help="Length of the tag in bits", metavar="T") + parser.add_argument('-f', "--file", required=True, help="Log file to simulate from") + parser.add_argument('-v', "--verbose", action='store_true', help="verbose/full-trace mode") + + args = parser.parse_args() + cache = Cache(args.numlines, args.numways, args.addrlen, args.taglen) + #numtests = -1 + extfile = os.path.expanduser(args.file) + with open(extfile, "r") as f: + for ln in f: + ln = ln.strip() + lninfo = ln.split() + if len(lninfo) < 3: #non-address line + if len(lninfo) > 0 and (lninfo[0] == 'BEGIN' or lninfo[0] == 'TRAIN'): + # currently BEGIN and END traces aren't being recorded correctly + # trying TRAIN clears instead + cache.invalidate() # a new test is starting, so 'empty' the cache + cache.clear_pLRU() + #numtests +=1 + if args.verbose: + print("New Test") + + else: + if lninfo[1] == 'F': + cache.flush() + if args.verbose: + print("F") + elif lninfo[1] == 'I': + cache.invalidate() + if args.verbose: + print("I") + else: + addr = int(lninfo[0], 16) + iswrite = lninfo[1] == 'W' or lninfo[1] == 'A' + result = cache.cacheaccess(addr, iswrite) + if args.verbose: + tag, setnum, offset = cache.splitaddr(addr) + print(hex(addr), hex(tag), hex(setnum), hex(offset), lninfo[2], result) + if not result == lninfo[2]: + print("Result mismatch at address", lninfo[0], ". Wally:", lninfo[2],", Sim:", result) #, "in test", numtests) + + + + + diff --git a/dvtestplan.md b/dvtestplan.md new file mode 100644 index 000000000..3b469e3b2 --- /dev/null +++ b/dvtestplan.md @@ -0,0 +1,30 @@ +# core-v-wally Design Verification Test Plan + +This document outlines the test plan for the Wally rv64gc configuration to reach Technology Readiness Level 5. + +1. Pass riscv-arch-test +2. Boot Linux +3. FPU pass all TestFloat vectors +4. Performance verification: Caches and branch predictor miss rates match independent simulation +5. Directed tests + - Privileged unit: Chapter 5 test plan + - MMU: PMA, PMP, virtual memory: Chapter 8 test plan + - Peripherals: Chapter 16 test plan +6. Random tests + - riscdv tests +7. Coverage tests + - Directed tests to bring coverage up to 100%. + - Statement, experssion, branch, condition, FSM coverage in Questa + - Do not measure toggle coverage + +All tests operate correctly in lock-step with ImperasDV + +Open questions: +1. How to define extent of riscdv random tests needed? +2. What other directed tests? + PMP Tests + Virtual Memory Tests + How to define pipeline tests? + Simple ones like use after load stall are not important. + Hard ones such as page table walker fault during data access while I$ access is pending are hard to articulate and code + Is there an example of a good directed pipeline test plan & implementation diff --git a/sim/imperas.ic b/sim/imperas.ic index 4c221f2af..2c1225760 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -10,6 +10,10 @@ --override cpu/mimpid=0x100 --override refRoot/cpu/tvec_align=64 +# bit manipulation +--override cpu/add_implicit_Extensions=B +--override cpu/bitmanip_version=1.0.0 + # clarify #--override refRoot/cpu/mtvec_sext=F diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 56044384b..c01c714b1 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -76,7 +76,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE logic [1:0] AdrSelMuxSel; logic [SETLEN-1:0] CacheSet; logic [LINELEN-1:0] LineWriteData; - logic ClearValid, ClearDirty, SetDirty, SetValid; + logic ClearDirty, SetDirty, SetValid; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] HitWay, ValidWay; logic CacheHit; @@ -116,7 +116,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, + .SetValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches @@ -188,19 +188,25 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Flush logic ///////////////////////////////////////////////////////////////////////////////////////////// - // Flush address (line number) - assign ResetOrFlushCntRst = reset | FlushCntRst; - flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr); - mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr); - assign FlushAdrP1 = NextFlushAdr + 1'b1; - assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]); - - // Flush way - flopenl #(NUMWAYS) FlushWayReg(clk, FlushWayCntEn, ResetOrFlushCntRst, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay); - if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; - else assign NextFlushWay = FlushWay[NUMWAYS-1]; - assign FlushWayFlag = FlushWay[NUMWAYS-1]; + if (!READ_ONLY_CACHE) begin:flushlogic + // Flush address (line number) + assign ResetOrFlushCntRst = reset | FlushCntRst; + flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr); + mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr); + assign FlushAdrP1 = NextFlushAdr + 1'b1; + assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]); + // Flush way + flopenl #(NUMWAYS) FlushWayReg(clk, FlushWayCntEn, ResetOrFlushCntRst, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay); + if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; + else assign NextFlushWay = FlushWay[NUMWAYS-1]; + assign FlushWayFlag = FlushWay[NUMWAYS-1]; + end // block: flushlogic + else begin:flushlogic + assign FlushWayFlag = 0; + assign FlushAdrFlag = 0; + end + ///////////////////////////////////////////////////////////////////////////////////////////// // Cache FSM ///////////////////////////////////////////////////////////////////////////////////////////// @@ -209,7 +215,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE .FlushStage, .CacheRW, .CacheAtomic, .Stall, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, - .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush, + .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CacheEn, .LRUWriteEn); diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 780807943..1e7101365 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -67,11 +67,15 @@ module cacheLRU assign AllValid = &ValidWay; ///// Update replacement bits. + + // coverage off + // Excluded from coverage b/c it is untestable without varying NUMWAYS. function integer log2 (integer value); for (log2=0; value>0; log2=log2+1) value = value>>1; return log2; endfunction // log2 + // coverage on // On a miss we need to ignore HitWay and derive the new replacement bits with the VictimWay. mux2 #(NUMWAYS) WayMux(HitWay, VictimWay, SetValid, Way); diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index cd1d43c55..d1d54097e 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -55,7 +55,6 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( input logic FlushAdrFlag, // On last set of a cache flush input logic FlushWayFlag, // On the last way for any set of a cache flush output logic SelAdr, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr - output logic ClearValid, // Clear the valid bit in the selected way and set output logic SetValid, // Set the dirty bit in the selected way and set output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic SetDirty, // Set the dirty bit in the selected way and set @@ -146,7 +145,6 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( assign SetValid = CurrState == STATE_WRITE_LINE; assign SetDirty = (CurrState == STATE_READY & AnyUpdateHit) | (CurrState == STATE_WRITE_LINE & (StoreAMO)); - assign ClearValid = '0; assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(StoreAMO)) | (CurrState == STATE_FLUSH & LineDirty); // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index d7cc0792d..174b82c59 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -38,8 +38,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr input logic [`PA_BITS-1:0] PAdr, // Physical address input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only) - input logic SetValid, // Set the dirty bit in the selected way and set - input logic ClearValid, // Clear the valid bit in the selected way and set + input logic SetValid, // Set the valid bit in the selected way and set input logic SetDirty, // Set the dirty bit in the selected way and set input logic ClearDirty, // Clear the dirty bit in the selected way and set input logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback @@ -71,22 +70,26 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, logic [LINELEN/8-1:0] FinalByteMask; logic SetValidEN; logic SetValidWay; - logic ClearValidWay; logic SetDirtyWay; logic ClearDirtyWay; logic SelNonHit; logic SelData; - logic FlushWayEn, VictimWayEn; - // FlushWay and VictimWay are part of a one hot way selection. Must clear them if FlushWay not selected - // or VictimWay not selected. - assign FlushWayEn = FlushWay & SelFlush; - assign VictimWayEn = VictimWay & SelWriteback; - - assign SelNonHit = FlushWayEn | SetValid | SelWriteback; - - mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag); - + + if (!READ_ONLY_CACHE) begin:flushlogic + logic FlushWayEn; + + mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag); + + // FlushWay is part of a one hot way selection. Must clear it if FlushWay not selected. + assign FlushWayEn = FlushWay & SelFlush; + assign SelNonHit = FlushWayEn | SetValid | SelWriteback; + end + else begin:flushlogic // no flush operation for read-only caches. + assign SelTag = VictimWay; + assign SelNonHit = SetValid; + end + mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData); ///////////////////////////////////////////////////////////////////////////////////////////// @@ -94,12 +97,16 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, ///////////////////////////////////////////////////////////////////////////////////////////// assign SetValidWay = SetValid & SelData; - assign ClearValidWay = ClearValid & SelData; - assign SetDirtyWay = SetDirty & SelData; assign ClearDirtyWay = ClearDirty & SelData; - + if (!READ_ONLY_CACHE) begin + assign SetDirtyWay = SetDirty & SelData; + assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage; + end + else begin + assign SelectedWriteWordEn = SetValidWay & ~FlushStage; + end + // If writing the whole line set all write enables to 1, else only set the correct word. - assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage; assign FinalByteMask = SetValidWay ? '1 : LineByteMask; // OR assign SetValidEN = SetValidWay & ~FlushStage; @@ -107,8 +114,8 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, // Tag Array ///////////////////////////////////////////////////////////////////////////////////////////// - ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn), - .addr(CacheSet), .dout(ReadTag), .bwe('1), + ram1p1rwe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn), + .addr(CacheSet), .dout(ReadTag), .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); // AND portion of distributed tag multiplexer @@ -128,10 +135,18 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, localparam LOGNUMSRAM = $clog2(NUMSRAM); for(words = 0; words < NUMSRAM; words++) begin: word - ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet), + if (!READ_ONLY_CACHE) begin:wordram + ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet), .dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), .din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), .we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); + end + else begin:wordram // no byte-enable needed for i$. + ram1p1rwe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet), + .dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), + .din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), + .we(SelectedWriteWordEn)); + end end // AND portion of distributed read multiplexers @@ -146,7 +161,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, if(CacheEn) begin ValidWay <= #1 ValidBits[CacheSet]; if(InvalidateCache) ValidBits <= #1 '0; - else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CacheSet] <= #1 SetValidWay; + else if (SetValidEN) ValidBits[CacheSet] <= #1 SetValidWay; end end diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 9a69085fd..a00d82663 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -54,47 +54,67 @@ module fdivsqrtpreproc ( logic [`DIVb:0] PreSqrtX; logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) - logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator + logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division + logic signedDiv; // signed division + logic NegQuotE; // Integer quotient is negative + logic AsE, BsE; // Signs of integer inputs + logic [`XLEN-1:0] AE; // input A after W64 adjustment if (`IDIV_ON_FPU) begin:intpreproc // Int Supported - logic signedDiv, NegQuotE; - logic AsBit, BsBit, AsE, BsE, ALTBE; - logic [`XLEN-1:0] AE, BE, PosA, PosB; - logic [`DIVBLEN:0] ZeroDiff, p; + logic [`XLEN-1:0] BE, PosA, PosB; // Extract inputs, signs, zero, depending on W64 mode if applicable assign signedDiv = ~Funct3E[0]; - assign NegQuotE = AsE ^ BsE; // Quotient is negative - + // Source handling if (`XLEN==64) begin // 64-bit, supports W64 - mux2 #(1) azeromux(~(|ForwardedSrcAE), ~(|ForwardedSrcAE[31:0]), W64E, AZeroE); - mux2 #(1) bzeromux(~(|ForwardedSrcBE), ~(|ForwardedSrcBE[31:0]), W64E, BZeroE); - mux2 #(1) abitmux(ForwardedSrcAE[63], ForwardedSrcAE[31], W64E, AsBit); - mux2 #(1) bbitmux(ForwardedSrcBE[63], ForwardedSrcBE[31], W64E, BsBit); - mux2 #(64) amux(ForwardedSrcAE, {{(`XLEN-32){AsE}}, ForwardedSrcAE[31:0]}, W64E, AE); - mux2 #(64) bmux(ForwardedSrcBE, {{(`XLEN-32){BsE}}, ForwardedSrcBE[31:0]}, W64E, BE); - assign AsE = signedDiv & AsBit; - assign BsE = signedDiv & BsBit; + mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE); + mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE); end else begin // 32 bits only - assign AsE = signedDiv & ForwardedSrcAE[31]; - assign BsE = signedDiv & ForwardedSrcBE[31]; assign AE = ForwardedSrcAE; assign BE = ForwardedSrcBE; - assign AZeroE = ~(|ForwardedSrcAE); - assign BZeroE = ~(|ForwardedSrcBE); - end + end + assign AZeroE = ~(|AE); + assign BZeroE = ~(|BE); + assign AsE = AE[`XLEN-1] & signedDiv; + assign BsE = BE[`XLEN-1] & signedDiv; + assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative // Force integer inputs to be postiive mux2 #(`XLEN) posamux(AE, -AE, AsE, PosA); mux2 #(`XLEN) posbmux(BE, -BE, BsE, PosB); // Select integer or floating point inputs - mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFNormLenX); - mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFNormLenD); + mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX); + mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD); + + + end else begin // Int not supported + assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}}; + assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}}; + end + + // count leading zeros for Subnorm FP and to normalize integer inputs + lzc #(`DIVb) lzcX (IFX, ell); + lzc #(`DIVb) lzcY (IFD, mE); + + // Normalization shift + assign XPreproc = IFX << (ell + {{`DIVBLEN{1'b0}}, 1'b1}); // *** try to remove this +1 + assign DPreproc = IFD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); + + // append leading 1 (for normal inputs) + // shift square root to be in range [1/4, 1) + // Normalized numbers are shifted right by 1 if the exponent is odd + // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + + if (`IDIV_ON_FPU) begin:intrightshift // Int Supported + logic [`DIVBLEN:0] ZeroDiff, p; + logic ALTBE; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros @@ -133,34 +153,16 @@ module fdivsqrtpreproc ( flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); + flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); if (`XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); - - end else begin // Int not supported - assign IFNormLenX = {Xm, {(`DIVb-`NF-1){1'b0}}}; - assign IFNormLenD = {Ym, {(`DIVb-`NF-1){1'b0}}}; + end else begin assign NumerZeroE = XZeroE; assign X = PreShiftX; end - // count leading zeros for Subnorm FP and to normalize integer inputs - lzc #(`DIVb) lzcX (IFNormLenX, ell); - lzc #(`DIVb) lzcY (IFNormLenD, mE); - - // Normalization shift - assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, 1'b1}); // *** try to remove this +1 - assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); - - // append leading 1 (for normal inputs) - // shift square root to be in range [1/4, 1) - // Normalized numbers are shifted right by 1 if the exponent is odd - // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); - assign DivX = {3'b000, ~NumerZeroE, XPreproc}; - // Sqrt is initialized on step one as R(X-1), so depends on Radix if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; diff --git a/src/generic/mem/ram1p1rwe.sv b/src/generic/mem/ram1p1rwe.sv new file mode 100644 index 000000000..480ad3b45 --- /dev/null +++ b/src/generic/mem/ram1p1rwe.sv @@ -0,0 +1,105 @@ +/////////////////////////////////////////// +// 1 port sram. +// +// Written: avercruysse@hmc.edu (Modified from ram1p1rwbe, by ross1728@gmail.com) +// Created: 04 April 2023 +// +// Purpose: ram1p1wre, but without byte-enable. Used for icache data. +// Be careful using this module, since coverage is turned off for (ce & we). +// In read-only caches, we never get (we=1, ce=0), so this waiver is needed. +// +// Documentation: +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// WIDTH is number of bits in one "word" of the memory, DEPTH is number of such words + +`include "wally-config.vh" + +module ram1p1rwe #(parameter DEPTH=64, WIDTH=44) ( + input logic clk, + input logic ce, + input logic [$clog2(DEPTH)-1:0] addr, + input logic [WIDTH-1:0] din, + input logic we, + output logic [WIDTH-1:0] dout +); + + logic [WIDTH-1:0] RAM[DEPTH-1:0]; + + // *************************************************************************** + // TRUE SRAM macro + // *************************************************************************** + if ((`USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray + // 64 x 128-bit SRAM + ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), + .A(addr), .D(din), + .BWEB('0), .Q(dout)); + + end else if ((`USE_SRAM == 1) & (WIDTH == 44) & (DEPTH == 64)) begin // RV64 cache tag + // 64 x 44-bit SRAM + ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), + .A(addr), .D(din), + .BWEB('0), .Q(dout)); + + end else if ((`USE_SRAM == 1) & (WIDTH == 22) & (DEPTH == 64)) begin // RV32 cache tag + // 64 x 22-bit SRAM + ram1p1rwbe_64x22 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), + .A(addr), .D(din), + .BWEB('0), .Q(dout)); + + // *************************************************************************** + // READ first SRAM model + // *************************************************************************** + end else begin: ram + integer i; + + // Read + logic [$clog2(DEPTH)-1:0] addrd; + flopen #($clog2(DEPTH)) adrreg(clk, ce, addr, addrd); + assign dout = RAM[addrd]; + + /* // Read + always_ff @(posedge clk) + if(ce) dout <= #1 mem[addr]; */ + + // Write divided into part for bytes and part for extra msbs + // Questa sim version 2022.3_2 does not allow multiple drivers for RAM when using always_ff. + // Therefore these always blocks use the older always @(posedge clk) + if(WIDTH >= 8) + always @(posedge clk) + // coverage off + // ce only goes low when cachefsm is in READY state and Flush is asserted. + // for read-only caches, we only goes high in the STATE_WRITE_LINE cachefsm state. + // so we can never get we=1, ce=0 for I$. + if (ce & we) + // coverage on + for(i = 0; i < WIDTH/8; i++) + RAM[addr][i*8 +: 8] <= #1 din[i*8 +: 8]; + + if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 + always @(posedge clk) + // coverage off + // (see the above explanation) + if (ce & we) + // coverage on + RAM[addr][WIDTH-1:WIDTH-WIDTH%8] <= #1 din[WIDTH-1:WIDTH-WIDTH%8]; + end + +endmodule diff --git a/src/generic/mem/ram2p1r1wbe.sv b/src/generic/mem/ram2p1r1wbe.sv index 4c72095b6..51e2871b4 100644 --- a/src/generic/mem/ram2p1r1wbe.sv +++ b/src/generic/mem/ram2p1r1wbe.sv @@ -122,11 +122,14 @@ module ram2p1r1wbe #(parameter DEPTH=1024, WIDTH=68) ( if(ce1) rd1 <= #1 mem[ra1]; */ // Write divided into part for bytes and part for extra msbs + // coverage off + // when byte write enables are tied high, the last IF is always taken if(WIDTH >= 8) always @(posedge clk) if (ce2 & we2) for(i = 0; i < WIDTH/8; i++) if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8]; + // coverage on if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 always @(posedge clk) diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 11efacffa..224ff8b37 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -71,6 +71,7 @@ module hazard ( // Similarly, CSR writes and fences flush all subsequent instructions and refetch them in light of the new operating modes and cache/TLB contents // Branch misprediction is found in the Execute stage and must flush the next two instructions. // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete + // When a WFI is interrupted and causes a trap, it flushes the rest of the pipeline but not the W stage, because the WFI needs to commit assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); assign FlushMCause = TrapM | RetM | CSRWriteFenceM; diff --git a/src/ifu/bpred/RASPredictor.sv b/src/ifu/bpred/RASPredictor.sv index f1a39f75b..21cad922f 100644 --- a/src/ifu/bpred/RASPredictor.sv +++ b/src/ifu/bpred/RASPredictor.sv @@ -62,7 +62,7 @@ module RASPredictor #(parameter int StackSize = 16 )( assign PushE = CallE & ~StallM & ~FlushM; assign WrongPredReturnD = (BPReturnWrongD) & ~StallE & ~FlushE; - assign FlushedReturnDE = (~StallE & FlushE & ReturnD) | (~StallM & FlushM & ReturnE); // flushed return + assign FlushedReturnDE = (~StallE & FlushE & ReturnD) | (FlushM & ReturnE); // flushed return assign RepairD = WrongPredReturnD | FlushedReturnDE ; diff --git a/src/ifu/decompress.sv b/src/ifu/decompress.sv index b28a9e7be..f0882ddf7 100644 --- a/src/ifu/decompress.sv +++ b/src/ifu/decompress.sv @@ -126,7 +126,7 @@ module decompress ( InstrD = {7'b0000000, rs2p, rds1p, 3'b110, rds1p, 7'b0110011}; // c.or else // if (instr16[6:5] == 2'b11) InstrD = {7'b0000000, rs2p, rds1p, 3'b111, rds1p, 7'b0110011}; // c.and - else if (instr16[12:10] == 3'b111 & `XLEN > 32) + else if (`XLEN > 32) //if (instr16[12:10] == 3'b111) full truth table no need to check [12:10] if (instr16[6:5] == 2'b00) InstrD = {7'b0100000, rs2p, rds1p, 3'b000, rds1p, 7'b0111011}; // c.subw else if (instr16[6:5] == 2'b01) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index c1556daeb..75b2bc9e8 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -130,7 +130,7 @@ module ifu ( logic CacheableF; // PMA indicates instruction address is cacheable logic SelSpillNextF; // In a spill, stall pipeline and gate local stallF logic BusStall; // Bus interface busy with multicycle operation - logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation + logic IFUCacheBusStallF; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill // branch predictor signal logic [`XLEN-1:0] PC1NextF; // Branch predictor next PCF @@ -147,7 +147,7 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCSpillNextF, .PCSpillF, .SelSpillNextF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallF, .ITLBMissF, .PCSpillNextF, .PCSpillF, .SelSpillNextF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill assign PCSpillNextF = PCNextF; assign PCSpillF = PCF; @@ -288,8 +288,8 @@ module ifu ( assign InstrRawF = IROMInstrF; end - assign IFUCacheBusStallD = ICacheStallF | BusStall; - assign IFUStallF = IFUCacheBusStallD | SelSpillNextF; + assign IFUCacheBusStallF = ICacheStallF | BusStall; + assign IFUStallF = IFUCacheBusStallF | SelSpillNextF; assign GatedStallD = StallD & ~SelSpillNextF; flopenl #(32) AlignedInstrRawDFlop(clk, reset | FlushD, ~StallD, PostSpillInstrRawF, nop, InstrRawD); diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index db68fb9b4..54c0f2261 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -40,7 +40,7 @@ module spill #( input logic [`XLEN-1:2] PCPlus4F, // PCF + 4 input logic [`XLEN-1:0] PCNextF, // The next PCF input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed - input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic IFUCacheBusStallF, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic ITLBMissF, // ITLB miss, ignore memory request input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) output logic [`XLEN-1:0] PCSpillNextF, // The next PCF for one of the two memory addresses of the spill @@ -78,7 +78,7 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; - assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF)); + assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF)); always_ff @(posedge clk) if (reset | FlushD) CurrState <= #1 STATE_READY; @@ -88,14 +88,14 @@ module spill #( case (CurrState) STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; else NextState = STATE_READY; - STATE_SPILL: if(IFUCacheBusStallD | StallD) NextState = STATE_SPILL; + STATE_SPILL: if(StallD) NextState = STATE_SPILL; else NextState = STATE_READY; default: NextState = STATE_READY; endcase end assign SelSpillF = (CurrState == STATE_SPILL); - assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD); + assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF); assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushD; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f2e147f00..51efeccb2 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -62,7 +62,7 @@ module lsu ( output logic LoadPageFaultM, StoreAmoPageFaultM, // Page fault exceptions output logic LoadMisalignedFaultM, // Load address misaligned fault output logic LoadAccessFaultM, // Load access fault (PMA) - output logic HPTWInstrAccessFaultM, // HPTW generated access fault during instruction fetch + output logic HPTWInstrAccessFaultF, // HPTW generated access fault during instruction fetch // cpu hazard unit (trap) output logic StoreAmoMisalignedFaultM, // Store or AMO address misaligned fault output logic StoreAmoAccessFaultM, // Store or AMO access fault @@ -159,7 +159,7 @@ module lsu ( .IEUAdrExtM, .PTE, .IHWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IHAdrM, .HPTWStall, .SelHPTW, .IgnoreRequestTLB, .LSULoadAccessFaultM, .LSUStoreAmoAccessFaultM, - .LoadAccessFaultM, .StoreAmoAccessFaultM, .HPTWInstrAccessFaultM); + .LoadAccessFaultM, .StoreAmoAccessFaultM, .HPTWInstrAccessFaultF); end else begin // No HPTW, so signals are not multiplexed assign PreLSURWM = MemRWM; assign IHAdrM = IEUAdrExtM; @@ -170,7 +170,7 @@ module lsu ( assign LoadAccessFaultM = LSULoadAccessFaultM; assign StoreAmoAccessFaultM = LSUStoreAmoAccessFaultM; assign {HPTWStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; - assign HPTWInstrAccessFaultM = '0; + assign HPTWInstrAccessFaultF = '0; end // CommittedM indicates the cache, bus, or HPTW are busy with a multiple cycle operation. diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index b093167cf..f90e42ce9 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -64,7 +64,7 @@ module hptw ( output logic SelHPTW, output logic HPTWStall, input logic LSULoadAccessFaultM, LSUStoreAmoAccessFaultM, - output logic LoadAccessFaultM, StoreAmoAccessFaultM, HPTWInstrAccessFaultM + output logic LoadAccessFaultM, StoreAmoAccessFaultM, HPTWInstrAccessFaultF ); typedef enum logic [3:0] {L0_ADR, L0_RD, @@ -98,12 +98,25 @@ module hptw ( logic [1:0] HPTWRW; logic [2:0] HPTWSize; // 32 or 64 bit access statetype WalkerState, NextWalkerState, InitialWalkerState; + logic HPTWLoadAccessFault, HPTWStoreAmoAccessFault, HPTWInstrAccessFault; + logic HPTWLoadAccessFaultDelay, HPTWStoreAmoAccessFaultDelay, HPTWInstrAccessFaultDelay; + logic HPTWAccessFaultDelay; + logic TakeHPTWFault, TakeHPTWFaultDelay; // map hptw access faults onto either the original LSU load/store fault or instruction access fault assign LSUAccessFaultM = LSULoadAccessFaultM | LSUStoreAmoAccessFaultM; - assign LoadAccessFaultM = WalkerState == IDLE ? LSULoadAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0]; - assign StoreAmoAccessFaultM = WalkerState == IDLE ? LSUStoreAmoAccessFaultM : LSUAccessFaultM & DTLBWalk & MemRWM[0]; - assign HPTWInstrAccessFaultM = WalkerState == IDLE ? 1'b0: LSUAccessFaultM & ~DTLBWalk; + assign HPTWLoadAccessFault = LSUAccessFaultM & DTLBWalk & MemRWM[1] & ~MemRWM[0]; + assign HPTWStoreAmoAccessFault = LSUAccessFaultM & DTLBWalk & MemRWM[0]; + assign HPTWInstrAccessFault = LSUAccessFaultM & ~DTLBWalk; + + flopr #(4) HPTWAccesFaultReg(clk, reset, {TakeHPTWFault, HPTWLoadAccessFault, HPTWStoreAmoAccessFault, HPTWInstrAccessFault}, + {TakeHPTWFaultDelay, HPTWLoadAccessFaultDelay, HPTWStoreAmoAccessFaultDelay, HPTWInstrAccessFaultDelay}); + + assign TakeHPTWFault = WalkerState != IDLE; + + assign LoadAccessFaultM = TakeHPTWFaultDelay ? HPTWLoadAccessFaultDelay : LSULoadAccessFaultM; + assign StoreAmoAccessFaultM = TakeHPTWFaultDelay ? HPTWStoreAmoAccessFaultDelay : LSUStoreAmoAccessFaultM; + assign HPTWInstrAccessFaultF = TakeHPTWFaultDelay ? HPTWInstrAccessFaultDelay : 1'b0; // Extract bits from CSRs and inputs assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; @@ -247,22 +260,26 @@ module hptw ( flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset | FlushW, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb case (WalkerState) - IDLE: if (TLBMiss & ~DCacheStallM) NextWalkerState = InitialWalkerState; + IDLE: if (TLBMiss & ~DCacheStallM & ~HPTWAccessFaultDelay) NextWalkerState = InitialWalkerState; else NextWalkerState = IDLE; L3_ADR: NextWalkerState = L3_RD; // first access in SV48 L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; + else if(LSUAccessFaultM) NextWalkerState = IDLE; else NextWalkerState = L2_ADR; L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39 else NextWalkerState = LEAF; L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; + else if(LSUAccessFaultM) NextWalkerState = IDLE; else NextWalkerState = L1_ADR; L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32 else NextWalkerState = LEAF; L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; + else if(LSUAccessFaultM) NextWalkerState = IDLE; else NextWalkerState = L0_ADR; L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD; else NextWalkerState = LEAF; L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; + else if(LSUAccessFaultM) NextWalkerState = IDLE; else NextWalkerState = LEAF; LEAF: if (`SVADU_SUPPORTED & HPTWUpdateDA) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; @@ -271,9 +288,10 @@ module hptw ( default: NextWalkerState = IDLE; // should never be reached endcase // case (WalkerState) - assign IgnoreRequestTLB = WalkerState == IDLE & TLBMiss; + assign IgnoreRequestTLB = (WalkerState == IDLE & TLBMiss) | (LSUAccessFaultM); // RT : 05 April 2023 if hptw request has pmp/a fault suppress bus access. assign SelHPTW = WalkerState != IDLE; - assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); + assign HPTWAccessFaultDelay = HPTWLoadAccessFaultDelay | HPTWStoreAmoAccessFaultDelay | HPTWInstrAccessFaultDelay; + assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss & ~(HPTWAccessFaultDelay)); assign ITLBMissOrUpdateDAF = ITLBMissF | (`SVADU_SUPPORTED & InstrUpdateDAF); assign DTLBMissOrUpdateDAM = DTLBMissM | (`SVADU_SUPPORTED & DataUpdateDAM); diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index be63adb43..ee78e6ccb 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -33,7 +33,9 @@ `include "wally-config.vh" module csrc #(parameter MHPMCOUNTERBASE = 12'hB00, + MTIME = 12'hB01, // this is a memory-mapped register; no such CSR exists, and access should fault MHPMCOUNTERHBASE = 12'hB80, + MTIMEH = 12'hB81, // this is a memory-mapped register; no such CSR exists, and access should fault MHPMEVENTBASE = 12'h320, HPMCOUNTERBASE = 12'hC00, HPMCOUNTERHBASE = 12'hC80, @@ -152,8 +154,10 @@ module csrc #(parameter /* verilator lint_off WIDTH */ if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT; // TIME register is a shadow of the memory-mapped MTIME from the CLINT /* verilator lint_on WIDTH */ - else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS & CSRAdrM != MTIME) + CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) + CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; else begin CSRCReadValM = 0; IllegalCSRCAccessM = 1; // requested CSR doesn't exist @@ -164,10 +168,14 @@ module csrc #(parameter if (CSRAdrM == TIME) CSRCReadValM = MTIME_CLINT[31:0];// TIME register is a shadow of the memory-mapped MTIME from the CLINT else if (CSRAdrM == TIMEH) CSRCReadValM = MTIME_CLINT[63:32]; /* verilator lint_on WIDTH */ - else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; - else if (CSRAdrM >= MHPMCOUNTERHBASE & CSRAdrM < MHPMCOUNTERHBASE+`COUNTERS) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; - else if (CSRAdrM >= HPMCOUNTERHBASE & CSRAdrM < HPMCOUNTERHBASE+`COUNTERS) CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; + else if (CSRAdrM >= MHPMCOUNTERBASE & CSRAdrM < MHPMCOUNTERBASE+`COUNTERS & CSRAdrM != MTIME) + CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else if (CSRAdrM >= HPMCOUNTERBASE & CSRAdrM < HPMCOUNTERBASE+`COUNTERS) + CSRCReadValM = HPMCOUNTER_REGW[CounterNumM]; + else if (CSRAdrM >= MHPMCOUNTERHBASE & CSRAdrM < MHPMCOUNTERHBASE+`COUNTERS & CSRAdrM != MTIMEH) + CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; + else if (CSRAdrM >= HPMCOUNTERHBASE & CSRAdrM < HPMCOUNTERHBASE+`COUNTERS) + CSRCReadValM = HPMCOUNTERH_REGW[CounterNumM]; else begin CSRCReadValM = 0; IllegalCSRCAccessM = 1; // requested CSR doesn't exist diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index ca3d35717..7d34e22fc 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -65,7 +65,7 @@ module privileged ( // fault sources input logic InstrAccessFaultF, // instruction access fault input logic LoadAccessFaultM, StoreAmoAccessFaultM, // load or store access fault - input logic HPTWInstrAccessFaultM, // hardware page table access fault while fetching instruction PTE + input logic HPTWInstrAccessFaultF, // hardware page table access fault while fetching instruction PTE input logic InstrPageFaultF, // page faults input logic LoadPageFaultM, StoreAmoPageFaultM, // page faults input logic InstrMisalignedFaultM, // misaligned instruction fault @@ -112,6 +112,8 @@ module privileged ( logic DelegateM; // trap should be delegated logic InterruptM; // interrupt occuring logic ExceptionM; // Memory stage instruction caused a fault + logic HPTWInstrAccessFaultM; // Hardware page table access fault while fetching instruction PTE + // track the current privilege level privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, @@ -142,8 +144,8 @@ module privileged ( // pipeline early-arriving trap sources privpiperegs ppr(.clk, .reset, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, - .InstrPageFaultF, .InstrAccessFaultF, .IllegalIEUFPUInstrD, - .InstrPageFaultM, .InstrAccessFaultM, .IllegalIEUFPUInstrM); + .InstrPageFaultF, .InstrAccessFaultF, .HPTWInstrAccessFaultF, .IllegalIEUFPUInstrD, + .InstrPageFaultM, .InstrAccessFaultM, .HPTWInstrAccessFaultM, .IllegalIEUFPUInstrM); // trap logic trap trap(.reset, diff --git a/src/privileged/privpiperegs.sv b/src/privileged/privpiperegs.sv index c3d308c11..684b0ad73 100644 --- a/src/privileged/privpiperegs.sv +++ b/src/privileged/privpiperegs.sv @@ -33,24 +33,26 @@ module privpiperegs ( input logic StallD, StallE, StallM, input logic FlushD, FlushE, FlushM, input logic InstrPageFaultF, InstrAccessFaultF, // instruction faults + input logic HPTWInstrAccessFaultF, // hptw fault during instruction page fetch input logic IllegalIEUFPUInstrD, // illegal IEU instruction decoded output logic InstrPageFaultM, InstrAccessFaultM, // delayed instruction faults - output logic IllegalIEUFPUInstrM // delayed illegal IEU instruction + output logic IllegalIEUFPUInstrM, // delayed illegal IEU instruction + output logic HPTWInstrAccessFaultM // hptw fault during instruction page fetch ); // Delayed fault signals - logic InstrPageFaultD, InstrAccessFaultD; - logic InstrPageFaultE, InstrAccessFaultE; + logic InstrPageFaultD, InstrAccessFaultD, HPTWInstrAccessFaultD; + logic InstrPageFaultE, InstrAccessFaultE, HPTWInstrAccessFaultE; logic IllegalIEUFPUInstrE; // pipeline fault signals - flopenrc #(2) faultregD(clk, reset, FlushD, ~StallD, - {InstrPageFaultF, InstrAccessFaultF}, - {InstrPageFaultD, InstrAccessFaultD}); - flopenrc #(3) faultregE(clk, reset, FlushE, ~StallE, - {IllegalIEUFPUInstrD, InstrPageFaultD, InstrAccessFaultD}, - {IllegalIEUFPUInstrE, InstrPageFaultE, InstrAccessFaultE}); - flopenrc #(3) faultregM(clk, reset, FlushM, ~StallM, - {IllegalIEUFPUInstrE, InstrPageFaultE, InstrAccessFaultE}, - {IllegalIEUFPUInstrM, InstrPageFaultM, InstrAccessFaultM}); -endmodule \ No newline at end of file + flopenrc #(3) faultregD(clk, reset, FlushD, ~StallD, + {InstrPageFaultF, InstrAccessFaultF, HPTWInstrAccessFaultF}, + {InstrPageFaultD, InstrAccessFaultD, HPTWInstrAccessFaultD}); + flopenrc #(4) faultregE(clk, reset, FlushE, ~StallE, + {IllegalIEUFPUInstrD, InstrPageFaultD, InstrAccessFaultD, HPTWInstrAccessFaultD}, + {IllegalIEUFPUInstrE, InstrPageFaultE, InstrAccessFaultE, HPTWInstrAccessFaultE}); + flopenrc #(4) faultregM(clk, reset, FlushM, ~StallM, + {IllegalIEUFPUInstrE, InstrPageFaultE, InstrAccessFaultE, HPTWInstrAccessFaultE}, + {IllegalIEUFPUInstrM, InstrPageFaultM, InstrAccessFaultM, HPTWInstrAccessFaultM}); +endmodule diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 81f1997af..ee5df5956 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -146,7 +146,7 @@ module wallypipelinedcore ( logic RASPredPCWrongM; logic IClassWrongM; logic [3:0] InstrClassM; - logic InstrAccessFaultF, HPTWInstrAccessFaultM; + logic InstrAccessFaultF, HPTWInstrAccessFaultF; logic [2:0] LSUHSIZE; logic [2:0] LSUHBURST; logic [1:0] LSUHTRANS; @@ -237,7 +237,7 @@ module wallypipelinedcore ( .StoreAmoPageFaultM, // connects to privilege .LoadMisalignedFaultM, // connects to privilege .LoadAccessFaultM, // connects to privilege - .HPTWInstrAccessFaultM, // connects to privilege + .HPTWInstrAccessFaultF, // connects to privilege .StoreAmoMisalignedFaultM, // connects to privilege .StoreAmoAccessFaultM, // connects to privilege .InstrUpdateDAF, @@ -289,7 +289,7 @@ module wallypipelinedcore ( .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, .IEUAdrM, .SetFflagsM, - .InstrAccessFaultF, .HPTWInstrAccessFaultM, .LoadAccessFaultM, .StoreAmoAccessFaultM, .SelHPTW, + .InstrAccessFaultF, .HPTWInstrAccessFaultF, .LoadAccessFaultM, .StoreAmoAccessFaultM, .SelHPTW, .PrivilegeModeW, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .STATUS_FS, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 2bc3622c1..281c69df1 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -480,7 +480,7 @@ logic [3:0] dummy; assign EndSample = DCacheFlushStart & ~DCacheFlushDone; flop #(1) BeginReg(clk, StartSampleFirst, BeginDelayed); - assign Begin = StartSampleFirst & ~ BeginDelayed; + assign Begin = StartSampleFirst & ~BeginDelayed; end @@ -555,12 +555,16 @@ logic [3:0] dummy; end - if (`ICACHE_SUPPORTED && `I_CACHE_ADDR_LOGGER) begin + if (`ICACHE_SUPPORTED && `I_CACHE_ADDR_LOGGER) begin : ICacheLogger int file; string LogFile; logic resetD, resetEdge; logic Enable; - assign Enable = ~dut.core.StallD & ~dut.core.FlushD & dut.core.ifu.bus.icache.CacheRWF[1] & ~reset; + // assign Enable = ~dut.core.StallD & ~dut.core.FlushD & dut.core.ifu.bus.icache.CacheRWF[1] & ~reset; + + // this version of Enable allows for accurate eviction logging. + // Likely needs further improvement. + assign Enable = dut.core.ifu.bus.icache.icache.cachefsm.LRUWriteEn & ~reset; flop #(1) ResetDReg(clk, reset, resetD); assign resetEdge = ~reset & resetD; initial begin @@ -568,50 +572,64 @@ end file = $fopen(LogFile, "w"); $fwrite(file, "BEGIN %s\n", memfilename); end - string HitMissString; - assign HitMissString = dut.core.ifu.bus.icache.icache.CacheHit ? "H" : "M"; + string AccessTypeString, HitMissString; + assign HitMissString = dut.core.ifu.bus.icache.icache.CacheHit ? "H" : + dut.core.ifu.bus.icache.icache.vict.cacheLRU.AllValid ? "E" : "M"; + assign AccessTypeString = dut.core.ifu.InvalidateICacheM ? "I" : "R"; always @(posedge clk) begin if(resetEdge) $fwrite(file, "TRAIN\n"); if(Begin) $fwrite(file, "BEGIN %s\n", memfilename); if(Enable) begin // only log i cache reads - $fwrite(file, "%h R %s\n", dut.core.ifu.PCPF, HitMissString); + $fwrite(file, "%h %s %s\n", dut.core.ifu.PCPF, AccessTypeString, HitMissString); end if(EndSample) $fwrite(file, "END %s\n", memfilename); end end - if (`DCACHE_SUPPORTED && `D_CACHE_ADDR_LOGGER) begin + + if (`DCACHE_SUPPORTED && `D_CACHE_ADDR_LOGGER) begin : DCacheLogger int file; string LogFile; logic resetD, resetEdge; - string HitMissString; + logic Enabled; + string AccessTypeString, HitMissString; + flop #(1) ResetDReg(clk, reset, resetD); assign resetEdge = ~reset & resetD; - assign HitMissString = dut.core.lsu.bus.dcache.dcache.CacheHit ? "H" : "M"; + assign HitMissString = dut.core.lsu.bus.dcache.dcache.CacheHit ? "H" : + (!dut.core.lsu.bus.dcache.dcache.vict.cacheLRU.AllValid) ? "M" : + dut.core.lsu.bus.dcache.dcache.LineDirty ? "D" : "E"; + assign AccessTypeString = dut.core.lsu.bus.dcache.FlushDCache ? "F" : + dut.core.lsu.bus.dcache.CacheAtomicM[1] ? "A" : + dut.core.lsu.bus.dcache.CacheRWM == 2'b10 ? "R" : + dut.core.lsu.bus.dcache.CacheRWM == 2'b01 ? "W" : + "NULL"; + // assign Enabled = (dut.core.lsu.bus.dcache.dcache.cachefsm.CurrState == 0) & + // ~dut.core.lsu.bus.dcache.dcache.cachefsm.FlushStage & + // (AccessTypeString != "NULL"); + + // This version of enable allows for accurate eviction logging. + // Likely needs further improvement. + assign Enabled = dut.core.lsu.bus.dcache.dcache.cachefsm.LRUWriteEn & + ~dut.core.lsu.bus.dcache.dcache.cachefsm.FlushStage & + (AccessTypeString != "NULL"); + initial begin - LogFile = $psprintf("DCache.log"); + LogFile = $psprintf("DCache.log"); file = $fopen(LogFile, "w"); - $fwrite(file, "BEGIN %s\n", memfilename); - end + $fwrite(file, "BEGIN %s\n", memfilename); + end always @(posedge clk) begin if(resetEdge) $fwrite(file, "TRAIN\n"); if(Begin) $fwrite(file, "BEGIN %s\n", memfilename); - if(~dut.core.StallW & ~dut.core.FlushW & dut.core.InstrValidM) begin - if(dut.core.lsu.bus.dcache.CacheRWM == 2'b10) begin - $fwrite(file, "%h R %s\n", dut.core.lsu.PAdrM, HitMissString); - end else if (dut.core.lsu.bus.dcache.CacheRWM == 2'b01) begin - $fwrite(file, "%h W %s\n", dut.core.lsu.PAdrM, HitMissString); - end else if (dut.core.lsu.bus.dcache.CacheAtomicM[1] == 1'b1) begin // *** This may change - $fwrite(file, "%h A %s\n", dut.core.lsu.PAdrM, HitMissString); - end else if (dut.core.lsu.bus.dcache.FlushDCache) begin - $fwrite(file, "%h F %s\n", dut.core.lsu.PAdrM, HitMissString); - end + if(Enabled) begin + $fwrite(file, "%h %s %s\n", dut.core.lsu.PAdrM, AccessTypeString, HitMissString); end if(EndSample) $fwrite(file, "END %s\n", memfilename); end end - if (`BPRED_SUPPORTED) begin + if (`BPRED_SUPPORTED) begin : BranchLogger if (`BPRED_LOGGER) begin string direction; int file; @@ -706,7 +724,7 @@ module DCacheFlushFSM // these dirty bit selections would be needed if dirty is moved inside the tag array. //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]), //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS+tagstart]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.RAM[index]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].wordram.CacheDataMem.RAM[index]), .index(index), .cacheWord(cacheWord), .CacheData(CacheData[way][index][cacheWord]), diff --git a/testbench/testbench_imperas.sv b/testbench/testbench_imperas.sv index 56ca763af..b6d22feaf 100644 --- a/testbench/testbench_imperas.sv +++ b/testbench/testbench_imperas.sv @@ -403,7 +403,7 @@ module DCacheFlushFSM // these dirty bit selections would be needed if dirty is moved inside the tag array. //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]), //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS+tagstart]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.RAM[index]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].wordram.CacheDataMem.RAM[index]), .index(index), .cacheWord(cacheWord), .CacheData(CacheData[way][index][cacheWord]), diff --git a/testbench/tests.vh b/testbench/tests.vh index 7f5ad4d0d..ca35ddd3b 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -49,7 +49,9 @@ string tvpaths[] = '{ "csrwrites", "priv", "ifu", - "fpu" + "fpu", + "lsu", + "vm64check" }; string coremark[] = '{ @@ -1052,6 +1054,28 @@ string imperas32f[] = '{ string arch64f[] = '{ `RISCVARCHTEST, + "rv64i_m/F/src/fdiv_b1-01.S", + "rv64i_m/F/src/fdiv_b20-01.S", + "rv64i_m/F/src/fdiv_b2-01.S", + "rv64i_m/F/src/fdiv_b21-01.S", + "rv64i_m/F/src/fdiv_b3-01.S", + "rv64i_m/F/src/fdiv_b4-01.S", + "rv64i_m/F/src/fdiv_b5-01.S", + "rv64i_m/F/src/fdiv_b6-01.S", + "rv64i_m/F/src/fdiv_b7-01.S", + "rv64i_m/F/src/fdiv_b8-01.S", + "rv64i_m/F/src/fdiv_b9-01.S", + "rv64i_m/F/src/fsqrt_b1-01.S", + "rv64i_m/F/src/fsqrt_b20-01.S", + "rv64i_m/F/src/fsqrt_b2-01.S", + "rv64i_m/F/src/fsqrt_b3-01.S", + "rv64i_m/F/src/fsqrt_b4-01.S", + "rv64i_m/F/src/fsqrt_b5-01.S", + "rv64i_m/F/src/fsqrt_b7-01.S", + "rv64i_m/F/src/fsqrt_b8-01.S", + "rv64i_m/F/src/fsqrt_b9-01.S", + + "rv64i_m/F/src/fadd_b10-01.S", "rv64i_m/F/src/fadd_b1-01.S", "rv64i_m/F/src/fadd_b11-01.S", @@ -1203,6 +1227,28 @@ string imperas32f[] = '{ string arch64d[] = '{ `RISCVARCHTEST, + // for speed + "rv64i_m/D/src/fdiv.d_b1-01.S", + "rv64i_m/D/src/fdiv.d_b20-01.S", + "rv64i_m/D/src/fdiv.d_b2-01.S", + "rv64i_m/D/src/fdiv.d_b21-01.S", + "rv64i_m/D/src/fdiv.d_b3-01.S", + "rv64i_m/D/src/fdiv.d_b4-01.S", + "rv64i_m/D/src/fdiv.d_b5-01.S", + "rv64i_m/D/src/fdiv.d_b6-01.S", + "rv64i_m/D/src/fdiv.d_b7-01.S", + "rv64i_m/D/src/fdiv.d_b8-01.S", + "rv64i_m/D/src/fdiv.d_b9-01.S", + "rv64i_m/D/src/fsqrt.d_b1-01.S", + "rv64i_m/D/src/fsqrt.d_b20-01.S", + "rv64i_m/D/src/fsqrt.d_b2-01.S", + "rv64i_m/D/src/fsqrt.d_b3-01.S", + "rv64i_m/D/src/fsqrt.d_b4-01.S", + "rv64i_m/D/src/fsqrt.d_b5-01.S", + "rv64i_m/D/src/fsqrt.d_b7-01.S", + "rv64i_m/D/src/fsqrt.d_b8-01.S", + "rv64i_m/D/src/fsqrt.d_b9-01.S", + "rv64i_m/D/src/fadd.d_b10-01.S", "rv64i_m/D/src/fadd.d_b1-01.S", "rv64i_m/D/src/fadd.d_b11-01.S", diff --git a/tests/coverage/WALLY-init-lib.h b/tests/coverage/WALLY-init-lib.h index f95ef285c..1dd43accf 100644 --- a/tests/coverage/WALLY-init-lib.h +++ b/tests/coverage/WALLY-init-lib.h @@ -36,6 +36,7 @@ rvtest_entry_point: csrw mtvec, t0 # Initialize MTVEC to trap_handler csrw mideleg, zero # Don't delegate interrupts csrw medeleg, zero # Don't delegate exceptions + li t0, 0x80 csrw mie, t0 # Enable machine timer interrupt la t0, topoftrapstack csrw mscratch, t0 # MSCRATCH holds trap stack pointer @@ -65,9 +66,8 @@ interrupt: # must be a timer interrupt j trap_return # clean up and return exception: - csrr t1, mepc # add 4 to MEPC to determine return Address - addi t1, t1, 4 - csrw mepc, t1 + li t0, 2 + csrr t1, mcause li t1, 8 # is it an ecall trap? andi t0, t0, 0xFC # if CAUSE = 8, 9, or 11 bne t0, t1, trap_return # ignore other exceptions @@ -86,6 +86,20 @@ changeprivilege: csrs mstatus, a0 # set mstatus.MPP with desired privilege trap_return: # return from trap handler + csrr t0, mepc # get address of instruction that caused exception + lh t0, 0(t0) # get instruction that caused exception + li t1, 3 + and t0, t0, t1 # mask off upper bits + beq t0, t1, instr32 # if lower 2 bits are 11, instruction is uncompresssed + li t0, 2 # increment PC by 2 for compressed instruction + j updateepc +instr32: + li t0, 4 +updateepc: + csrr t1, mepc # add 2 or 4 (from t0) to MEPC to determine return Address + add t1, t1, t0 + csrw mepc, t1 + ld t1, -8(tp) # restore t1 and t0 ld t0, 0(tp) csrrw tp, mscratch, tp # restore tp diff --git a/tests/coverage/csrwrites.S b/tests/coverage/csrwrites.S index 76e310e5f..41d211467 100644 --- a/tests/coverage/csrwrites.S +++ b/tests/coverage/csrwrites.S @@ -27,9 +27,11 @@ #include "WALLY-init-lib.h" main: + li t0, -5 + csrw stimecmp, t0 # initialize so ImperasDV agrees csrrw t0, stimecmp, t0 - csrrw t0, satp, t0 - csrrw t0, stvec, t0 - csrrw t0, sscratch, t0 + csrrw t0, satp, zero + csrrw t0, stvec, zero + csrrw t0, sscratch, zero j done diff --git a/tests/coverage/fpu.S b/tests/coverage/fpu.S index a349ac606..250100a68 100644 --- a/tests/coverage/fpu.S +++ b/tests/coverage/fpu.S @@ -67,6 +67,7 @@ main: # fcvt.w.q a0, ft0 # fcvt.q.d ft3, ft0 + # Completing branch coverage in fctrl.sv .word 0x38007553 // Testing the all False case for 119 - funct7 under, op = 101 0011 .word 0x40000053 // Line 145 All False Test case - illegal instruction? .word 0xd0400053 // Line 156 All False Test case - illegal instruction? @@ -74,6 +75,11 @@ main: .word 0xd2400053 // Line 168 All False Test case - illegal instruction? .word 0xc2400053 // Line 174 All False Test case - illegal instruction? + # Increasing conditional coverage in fctrl.sv + .word 0xc5000007 // Attempting to toggle (Op7 != 7) to 0 on line 97 in fctrl, not sure what instruction this works out to + .word 0xe0101053 // toggling (Rs2D == 0) to 0 on line 139 in fctrl. Illegal Intsr (like fclass but incorrect rs2) + .word 0xe0100053 // toggling (Rs2D == 0) to 0 on line 141 in fctrl. Illegal Intsr (like fmv but incorrect rs2) + # Test illegal instructions are detected .word 0x00000007 // illegal floating-point load (bad Funct3) .word 0x00000027 // illegal floating-point store (bad Funct3) diff --git a/tests/coverage/ieu.S b/tests/coverage/ieu.S index 3fd56686f..cb0dae877 100644 --- a/tests/coverage/ieu.S +++ b/tests/coverage/ieu.S @@ -42,6 +42,7 @@ main: clz t1, t0 # Test forwarding from store conditional + mv a0, sp lr.w t0, 0(a0) sc.w t0, a1, 0(a0) addi t0, t0, 1 diff --git a/tests/coverage/ifu.S b/tests/coverage/ifu.S index 9cde14ce2..ef863c617 100644 --- a/tests/coverage/ifu.S +++ b/tests/coverage/ifu.S @@ -35,20 +35,15 @@ main: //.hword 0x2000 // CL type compressed floating-point ld-->funct3,imm,rs1',imm,rd',op // binary version 0000 0000 0000 0000 0010 0000 0000 0000 mv s0, sp - c.fld fs0, 0(s0) + c.fld fs0, 0(s0) // Previously uncovered instructions + c.fsd fs0, 0(s0) + .hword 0x2002 // c.fldsp fs0, 0 + .hword 0xA002 // c.fsdsp fs0, 0 + .hword 0x9C41 // line 134 Illegal compressed instruction - c.fsd fs0, 0(s0) + //.hword 0x9C01 //# Illegal compressed instruction with op = 01, instr[15:10] = 100111, and 0's everywhere else - // c.fldsp fs0, 0 - .hword 0x2002 - - // c.fsdsp fs0, 0 - .hword 0xA002 - - //# Illegal compressed instruction with op = 01, instr[15:10] = 100111, and 0's everywhere else - //.hword 0x9C01 - - # Line Illegal compressed instruction - .hword 0x9C41 + + j done diff --git a/tests/coverage/lsu.S b/tests/coverage/lsu.S index 92d01b196..a5d8b1e51 100644 --- a/tests/coverage/lsu.S +++ b/tests/coverage/lsu.S @@ -1,34 +1,35 @@ -//lsu.S -// A set of tests meant to stress the LSU to increase coverage -// Manuel Alejandro Mendoza Manriquez mmendozamanriquez@g.hmc.edu -// Noah Limpert nlimpert@g.hmc.edu -// March 28 2023 +/////////////////////////////////////////// +// lsu.S +// +// Written: Kevin Box and Miles Cook kbox@hmc.edu mdcook@hmc.edu 26 March 2023 +// +// Purpose: Test coverage for lsu +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// - -// Test 1 -// Cache ways 1,2,3 do not have SelFlush = 0 -// To make SelFlush = 0 we must evict lines from ways 1,2,3 -// Will load 4 words with same tags, filling 4 ways of cache -// edit and store these words so that dirty bit is set ( is this necessary?) -// Will then load 4 more words, evicting the previous 4 words -// will make SelFlush = 0 for all 4 ways. - -// Load code to initialize stack, handle interrupts, terminate +// load code to initalize stack, handle interrupts, terminate #include "WALLY-init-lib.h" main: - li t0, 4096 //offset such that set will be same - li t1, 0 #t1 = i = 0 - li t2, 8 # n = 8 - add t3, sp, 0 // what our offset for loads and stores will be - -for1: bge t1, t2, done - add t3, t3, t0 - lw t4, 0(t3) - addi t4, t4, 1 - sw t4, 0(t3) - addi t1, t1, 1 - j for1 + sfence.vma x0, x0 // sfence.vma to assert TLBFlush + + j done diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index c4e9e302d..6ab5951b4 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -33,7 +33,8 @@ main: ecall # Test read to stimecmp fails when MCOUNTEREN_TM is not set - addi t0, zero, 0 + li t1, -3 + csrw stimecmp, t1 csrr t0, stimecmp @@ -56,6 +57,9 @@ main: ecall # machine mode again + # switch to supervisor mode + li a0, 1 + ecall # Test write to STVAL, SCAUSE, SEPC, and STIMECMP CSRs li t0, 0 @@ -70,6 +74,10 @@ main: # Switch to machine mode li a0, 3 ecall + + # Write to MCOUNTINHIBIT CSR + csrw mcountinhibit, t0 + # Testing the HPMCOUNTERM performance counter: writing # Base address is 2816 (MHPMCOUNTERBASE) # There are 32 HPMCOUNTER registers @@ -108,6 +116,33 @@ main: # Testing the HPMCOUNTERM performance counter: reading csrr t0, 2817 + + # Test writes to pmp address registers + csrw 951, t0 + csrw 952, t0 + csrw 953, t0 + csrw 954, t0 + csrw 955, t0 + csrw 956, t0 + csrw 957, t0 + csrw 958, t0 + + + # Testing writes to MTVAL, MCAUSE + li t0, 0 + csrw mtval, t0 + csrw mcause, t0 + + # set mstatus to enable floating point registers (mstatus.FS = 11) + bseti t1, zero, 13 + csrs mstatus, t1 + bseti t1, zero, 14 + csrs mstatus, t1 + + # Test writes to floating point CSRs + csrw frm, t0 + csrw fflags, t0 + j done diff --git a/tests/coverage/vm64check.S b/tests/coverage/vm64check.S new file mode 100644 index 000000000..55c4db959 --- /dev/null +++ b/tests/coverage/vm64check.S @@ -0,0 +1,173 @@ +/////////////////////////////////////////// +// vm64check.S +// +// Written: David_Harris@hmc.edu 7 April 2023 +// +// Purpose: vm64check coverage +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// Cover IMMU vm64check block by jumping to illegal virtual addresses +// Need a nonstandard trap handler to deal with returns from theses jumps +// assign eq_46_38 = &(VAdr[46:38]) | ~|(VAdr[46:38]); + // assign eq_63_47 = &(VAdr[63:47]) | ~|(VAdr[63:47]); + // assign UpperBitsUnequal = SV39Mode ? ~(eq_63_47 & eq_46_38) : ~eq_63_47; + +.section .text.init +.global rvtest_entry_point + +rvtest_entry_point: + la sp, topofstack # Initialize stack pointer (not used) + + # Set up interrupts + la t0, trap_handler + csrw mtvec, t0 # Initialize MTVEC to trap_handler + # set up PMP so user and supervisor mode can access full address space + csrw pmpcfg0, 0xF # configure PMP0 to TOR RWX + li t0, 0xFFFFFFFF + csrw pmpaddr0, t0 # configure PMP0 top of range to 0xFFFFFFFF to allow all 32-bit addresses + + # SATP in non-39 mode + csrw satp, zero + + // vm64check coverage +check1: + // check virtual addresses with bits 63:47 and/or 46:38 being equal or unequal + li t0, 0x00000001800F0000 # unimplemented memory with upper and lower all zero + la ra, check2 + jalr t0 + +check2: + li t0, 0xFFFFFFF1800F0000 # unimplemented memory with upper and lower all one + la ra, check3 + jalr t0 + +check3: + li t0, 0xFFF81001800F0000 # unimplemented memory with upper all one, lower mixed + la ra, check4 + jalr t0 + +check4: + li t0, 0x03001001800F0000 # unimplemented memory with upper mixed, lower mixed + la ra, check5 + jalr t0 + +check5: + li t0, 0x00001001800F0000 # unimplemented memory with upper all zero, lower mixed + la ra, check11 + jalr t0 + +check11: + # SATP in SV39 mode + li t0, 0x8000000000000000 + csrw satp, t0 + + // check virtual addresses with bits 63:47 and/or 46:38 being equal or unequal + li t0, 0x00000001800F0000 # unimplemented memory with upper and lower all zero + la ra, check12 + jalr t0 + +check12: + li t0, 0xFFFFFFF1800F0000 # unimplemented memory with upper and lower all one + la ra, check13 + jalr t0 + +check13: + li t0, 0xFFF81001800F0000 # unimplemented memory with upper all one, lower mixed + la ra, check14 + jalr t0 + +check14: + li t0, 0x03001001800F0000 # unimplemented memory with upper mixed, lower mixed + la ra, check15 + jalr t0 + +check15: + li t0, 0x00001001800F0000 # unimplemented memory with upper all zero, lower mixed + la ra, check16 + jalr t0 + +check16: + +write_tohost: + la t1, tohost + li t0, 1 # 1 for success, 3 for failure + sd t0, 0(t1) # send success code + +self_loop: + j self_loop # wait + +.align 4 # trap handlers must be aligned to multiple of 4 +trap_handler: + csrw mepc, ra # return to address in ra + mret + +.section .tohost +tohost: # write to HTIF + .dword 0 +fromhost: + .dword 0 + + +# Initialize stack with room for 512 bytes +.bss + .space 512 +topofstack: + + + + + + j done + + lw t1, 0(t0) + li t0, 0xFFFFFFFF80000000 + lw t1, 0(t0) + li t1, 0xFFF8000080000000 + lw t1, 0(t0) + li t1, 0x1000000080000000 + lw t1, 0(t0) + li t1, 0x0000010080000000 + lw t1, 0(t0) + li t0, 0x8000000000000000 + csrw satp, t0 # SV39 mode + li t0, 0x0000000080000000 + lw t1, 0(t0) + li t0, 0xFFFFFFFF80000000 + lw t1, 0(t0) + li t1, 0xFFF8000080000000 + lw t1, 0(t0) + li t1, 0x1000000080000000 + lw t1, 0(t0) + li t1, 0x0000010080000000 + lw t1, 0(t0) + li t0, 0x9000000000000000 + csrw satp, t0 # SV48 mode + li t0, 0x0000000080000000 + lw t1, 0(t0) + li t0, 0xFFFFFFFF80000000 + lw t1, 0(t0) + li t1, 0xFFF8000080000000 + lw t1, 0(t0) + li t1, 0x1000000080000000 + lw t1, 0(t0) + li t1, 0x0000010080000000 + lw t1, 0(t0) + li t0, 0x0000000000000000 + csrw satp, t0 # disable virtual memory \ No newline at end of file diff --git a/tests/custom/cacheSimTest/CacheSimTest.py b/tests/custom/cacheSimTest/CacheSimTest.py new file mode 100755 index 000000000..44cc00b06 --- /dev/null +++ b/tests/custom/cacheSimTest/CacheSimTest.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +########################################### +## CacheSimTest.py +## +## Written: lserafini@hmc.edu +## Created: 4 April 2023 +## Modified: 5 April 2023 +## +## Purpose: Confirm that the cache simulator behaves as expected. +## +## A component of the CORE-V-WALLY configurable RISC-V project. +## +## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +## +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +## +## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +## except in compliance with the License, or, at your option, the Apache License version 2.0. You +## may obtain a copy of the License at +## +## https:##solderpad.org/licenses/SHL-2.1/ +## +## Unless required by applicable law or agreed to in writing, any work distributed under the +## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +## either express or implied. See the License for the specific language governing permissions +## and limitations under the License. +################################################################################################ + +import sys +import os + +sys.path.append(os.path.expanduser("~/cvw/bin")) +import CacheSim as cs + +if __name__ == "__main__": + cache = cs.Cache(16, 4, 16, 8) + # 0xABCD -> tag: AB, set: C, offset: D + + #address split checking + assert (cache.splitaddr(0x1234) == (0x12,0x3,0x4)) + assert (cache.splitaddr(0x2638) == (0x26,0x3,0x8)) + assert (cache.splitaddr(0xA3E6) == (0xA3,0xE,0x6)) + + #insert way 0 set C tag AB + assert (cache.cacheaccess(0xABCD) == 'M') + assert (cache.ways[0][0xC].tag == 0xAB) + assert (cache.cacheaccess(0xABCD) == 'H') + assert (cache.pLRU[0xC] == [1,1,0]) + + #make way 0 set C dirty + assert (cache.cacheaccess(0xABCD, True) == 'H') + + #insert way 1 set C tag AC + assert (cache.cacheaccess(0xACCD) == 'M') + assert (cache.ways[1][0xC].tag == 0xAC) + assert (cache.pLRU[0xC] == [1,0,0]) + + #insert way 2 set C tag AD + assert (cache.cacheaccess(0xADCD) == 'M') + assert (cache.ways[2][0xC].tag == 0xAD) + assert (cache.pLRU[0xC] == [0,0,1]) + + #insert way 3 set C tag AE + assert (cache.cacheaccess(0xAECD) == 'M') + assert (cache.ways[3][0xC].tag == 0xAE) + assert (cache.pLRU[0xC] == [0,0,0]) + + #misc hit and pLRU checking + assert (cache.cacheaccess(0xABCD) == 'H') + assert (cache.pLRU[0xC] == [1,1,0]) + assert (cache.cacheaccess(0xADCD) == 'H') + assert (cache.pLRU[0xC] == [0,1,1]) + + #evict way 1, now set C has tag AF + assert (cache.cacheaccess(0xAFCD) == 'E') + assert (cache.ways[1][0xC].tag == 0xAF) + assert (cache.pLRU[0xC] == [1,0,1]) + + #evict way 3, now set C has tag AC + assert (cache.cacheaccess(0xACCD) == 'E') + assert (cache.ways[3][0xC].tag == 0xAC) + assert (cache.pLRU[0xC] == [0,0,0]) + + #evict way 0, now set C has tag EA + #this line was dirty, so there was a wb + assert (cache.cacheaccess(0xEAC2) == 'D') + assert (cache.ways[0][0xC].tag == 0xEA) + assert (cache.pLRU[0xC] == [1,1,0]) \ No newline at end of file