Merge branch 'main' of github.com:openhwgroup/cvw

This commit is contained in:
Jacob Pease 2023-08-26 21:53:10 -05:00
commit 43202376a0
43 changed files with 2295 additions and 122 deletions

View File

@ -42,11 +42,12 @@ localparam ZICNTR_SUPPORTED = 1;
localparam ZIHPM_SUPPORTED = 1;
localparam COUNTERS = 12'd32;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
localparam ZICBOM_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 1;
localparam ZICBOM_SUPPORTED = 1;
localparam ZICBOZ_SUPPORTED = 1;
localparam ZICBOP_SUPPORTED = 1;
localparam SVPBMT_SUPPORTED = 1;
localparam SVNAPOT_SUPPORTED = 1;
localparam SVINVAL_SUPPORTED = 1;
// LSU microarchitectural Features

View File

@ -44,11 +44,12 @@ localparam COUNTERS = 12'd32;
localparam ZICNTR_SUPPORTED = 1;
localparam ZIHPM_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 0;
localparam ZICBOM_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 1;
localparam ZICBOM_SUPPORTED = 1;
localparam ZICBOZ_SUPPORTED = 1;
localparam ZICBOP_SUPPORTED = 1;
localparam SVPBMT_SUPPORTED = 1;
localparam SVNAPOT_SUPPORTED = 1;
localparam SVINVAL_SUPPORTED = 1;
// LSU microarchitectural Features

View File

@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SVNAPOT_SUPPORTED = 0;
localparam SVINVAL_SUPPORTED = 0;
// LSU microarchitectural Features

View File

@ -46,9 +46,10 @@ localparam ZIHPM_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 1;
localparam ZICBOM_SUPPORTED = 1;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 1;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SVNAPOT_SUPPORTED = 0;
localparam SVINVAL_SUPPORTED = 1;
// LSU microarchitectural Features

View File

@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SVNAPOT_SUPPORTED = 0;
localparam SVINVAL_SUPPORTED = 0;
// LSU microarchitectural Features

View File

@ -47,6 +47,7 @@ localparam ZICBOM_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SVNAPOT_SUPPORTED = 0;
localparam SVINVAL_SUPPORTED = 0;
// LSU microarchitectural Features

View File

@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SVNAPOT_SUPPORTED = 0;
localparam SVINVAL_SUPPORTED = 1;
// LSU microarchitectural Features

View File

@ -48,9 +48,10 @@ localparam ZIHPM_SUPPORTED = 1;
localparam ZFH_SUPPORTED = 0;
localparam SSTC_SUPPORTED = 1;
localparam ZICBOM_SUPPORTED = 1;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 1;
localparam ZICBOP_SUPPORTED = 1;
localparam SVPBMT_SUPPORTED = 1;
localparam SVNAPOT_SUPPORTED = 1;
localparam SVINVAL_SUPPORTED = 1;
// LSU microarchitectural Features

View File

@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0;
localparam ZICBOZ_SUPPORTED = 0;
localparam ZICBOP_SUPPORTED = 0;
localparam SVPBMT_SUPPORTED = 0;
localparam SVNAPOT_SUPPORTED = 0;
localparam SVINVAL_SUPPORTED = 0;
// LSU microarchitectural Features

View File

@ -101,10 +101,10 @@ localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
localparam DURLEN = ($clog2(FPDUR+1));
localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
localparam DIVBLEN = ($clog2(DIVb+1)-1);
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result
// largest length in IEU/FPU
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF));
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF)); // max(XLEN, NF)
localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($unsigned(FLEN)));
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));

View File

@ -25,6 +25,7 @@ parameter cvw_t P = '{
ZICBOZ_SUPPORTED : ZICBOZ_SUPPORTED,
ZICBOP_SUPPORTED : ZICBOP_SUPPORTED,
SVPBMT_SUPPORTED : SVPBMT_SUPPORTED,
SVNAPOT_SUPPORTED : SVNAPOT_SUPPORTED,
SVINVAL_SUPPORTED : SVINVAL_SUPPORTED,
BUS_SUPPORTED : BUS_SUPPORTED,
DCACHE_SUPPORTED : DCACHE_SUPPORTED,

5
src/cache/cache.sv vendored
View File

@ -101,6 +101,7 @@ module cache import cvw::*; #(parameter cvw_t P,
logic [LINELEN/8-1:0] LineByteMask;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
logic ZeroCacheLine;
logic CMOZeroHit;
logic [LINELEN-1:0] PreLineWriteData;
genvar index;
@ -119,7 +120,7 @@ module cache import cvw::*; #(parameter cvw_t P,
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
.clk, .reset, .CacheEn, .CMOp, .CacheSet, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .VictimWay,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
// Select victim way for associative caches
@ -225,7 +226,7 @@ module cache import cvw::*; #(parameter cvw_t P,
.FlushStage, .CacheRW, .CacheAtomic, .Stall,
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
.CacheMiss, .CacheAccess, .SelAdr,
.ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .SelFlush,
.ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
.InvalidateCache, .CMOp, .CacheEn, .LRUWriteEn);

40
src/cache/cachefsm.sv vendored
View File

@ -60,6 +60,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
output logic SetDirty, // Set the dirty bit in the selected way and set
output logic ClearDirty, // Clear the dirty bit in the selected way and set
output logic ZeroCacheLine, // Write zeros to all bytes of cacheline
output logic CMOZeroHit, // CMOZ hit
output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback
output logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag
output logic LRUWriteEn, // Update the LRU state
@ -75,7 +76,10 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
logic AnyUpdateHit, AnyHit;
logic AnyMiss;
logic FlushFlag;
logic CMOWritebackHit;
logic CMOZeroNoEviction;
logic CMOZeroEviction;
typedef enum logic [3:0]{STATE_READY, // hit states
// miss states
STATE_FETCH,
@ -93,8 +97,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
statetype CurrState, NextState;
assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss
assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1
assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1
assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit
assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit;
assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now
assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line
assign FlushFlag = FlushAdrFlag & FlushWayFlag;
// outputs for the performance counters.
@ -117,8 +125,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck
else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH;
else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement
else if(AnyMiss) /* & LineDirty */ NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement
else if((CMOp[1] | CMOp[2]) & CacheHit) NextState = STATE_CMO_WRITEBACK;
else if(AnyMiss | CMOZeroEviction) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement
else if(CMOWritebackHit) NextState = STATE_CMO_WRITEBACK;
else NextState = STATE_READY;
STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE;
else if(CacheBusAck) NextState = STATE_READY;
@ -127,7 +135,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD;
else NextState = STATE_READY;
// exclusion-tag-start: icache case
STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH;
STATE_WRITEBACK: if(CacheBusAck & ~CMOp[3]) NextState = STATE_FETCH;
else if(CacheBusAck) NextState = STATE_CMO_DONE;
else NextState = STATE_WRITEBACK;
// eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack.
STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK;
@ -139,6 +148,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
STATE_CMO_WRITEBACK: if(CacheBusAck & (CMOp[1] | CMOp[2])) NextState = STATE_CMO_DONE;
else NextState = STATE_CMO_WRITEBACK;
STATE_CMO_DONE: if(Stall) NextState = STATE_CMO_DONE;
else NextState = STATE_READY;
// exclusion-tag-end: icache case
default: NextState = STATE_READY;
endcase
@ -146,7 +157,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
// com back to CPU
assign CacheCommitted = (CurrState != STATE_READY) & ~(READ_ONLY_CACHE & (CurrState == STATE_READ_HOLD | CurrState == STATE_CMO_DONE));
assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | ((CMOp[1] | CMOp[2]) & CacheHit))) | // exclusion-tag: icache StallStates
assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | CMOWritebackHit | CMOZeroEviction)) | // exclusion-tag: icache StallStates
(CurrState == STATE_FETCH) |
(CurrState == STATE_WRITEBACK) |
(CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write.
@ -154,21 +165,26 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
(CurrState == STATE_FLUSH_WRITEBACK) |
(CurrState == STATE_CMO_WRITEBACK);
// write enables internal to cache
assign CMOZeroHit = CurrState == STATE_READY & CMOp[3] & CacheHit ;
assign SetValid = CurrState == STATE_WRITE_LINE |
(CurrState == STATE_READY & CMOp[3]); // *** RT: NOT completely right has to be a hit
(CurrState == STATE_READY & CMOZeroNoEviction) |
(P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]);
assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0] & CacheHit) |
(CurrState == STATE_CMO_WRITEBACK & CMOp[2] & CacheBusAck));
// coverage off -item e 1 -fecexprrow 8
assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) |
assign LRUWriteEn = (CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) |
(P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck) |
(CurrState == STATE_WRITE_LINE) & ~FlushStage;
// exclusion-tag-start: icache flushdirtycontrols
assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOp[3])) | // exclusion-tag: icache SetDirty *** NOT completely right has to be a hit for CMOp[3]
(CurrState == STATE_WRITE_LINE & (CacheRW[0]));
assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty
(CurrState == STATE_WRITE_LINE & (CacheRW[0])) |
(P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck));
assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty
(CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set.
// Flush and eviction controls
(P.ZICBOM_SUPPORTED & CurrState == STATE_CMO_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck);
assign ZeroCacheLine = CurrState == STATE_READY & CMOp[3]; // *** RT: NOT completely right
assign ZeroCacheLine = P.ZICBOZ_SUPPORTED & ((CurrState == STATE_READY & CMOZeroNoEviction) |
(CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)));
assign SelWriteback = (CurrState == STATE_WRITEBACK & ~CacheBusAck) |
(CurrState == STATE_READY & AnyMiss & LineDirty);
assign SelCMOWriteback = CurrState == STATE_CMO_WRITEBACK;
@ -188,7 +204,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
// Bus interface controls
assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses
(CurrState == STATE_FETCH & ~CacheBusAck) |
(CurrState == STATE_WRITEBACK & CacheBusAck);
(CurrState == STATE_WRITEBACK & CacheBusAck & ~CMOp[3]);
assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | // exclusion-tag: icache CacheBusW
(CurrState == STATE_WRITEBACK & ~CacheBusAck) |
(CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) |

11
src/cache/cacheway.sv vendored
View File

@ -41,10 +41,10 @@ module cacheway import cvw::*; #(parameter cvw_t P,
input logic SetValid, // Set the valid bit in the selected way and set
input logic ClearValid, // Clear the valid bit in the selected way and set
input logic SetDirty, // Set the dirty bit in the selected way and set
input logic ZeroCacheLine, // Write zeros to all bytes of a cache line
input logic CMOZeroHit, // Write zeros to all bytes of a cache line
input logic ClearDirty, // Clear the dirty bit in the selected way and set
input logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback
input logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag
input logic SelCMOWriteback,// Overrides cached tag check to select a specific way and set for writeback for both data and tag
input logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr
input logic VictimWay, // LRU selected this way as victim to evict
input logic FlushWay, // This way is selected for flush and possible writeback if dirty
@ -81,7 +81,9 @@ module cacheway import cvw::*; #(parameter cvw_t P,
logic SelNotHit2;
if (P.ZICBOZ_SUPPORTED) begin : cbologic
assign SelNotHit2 = SetValid & ~(ZeroCacheLine & HitWay);
assign SelNotHit2 = SetValid & ~CMOZeroHit;
//assign SelNotHit2 = SetValid;
end else begin : cbologic
assign SelNotHit2 = SetValid;
end
@ -96,7 +98,8 @@ module cacheway import cvw::*; #(parameter cvw_t P,
// nonzero ways will never see SelFlush=0 while FlushWay=1 since FlushWay only advances on a subset of SelFlush assertion cases.
assign FlushWayEn = FlushWay & SelFlush;
// *** RT: This is slopy. I should refactor to have the fsm issue two types of writeback commands
assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback;
assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; // *** this is not correct
//assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback;
end else begin:flushlogic // no flush operation for read-only caches.
assign SelTag = VictimWay;

View File

@ -60,6 +60,7 @@ typedef struct packed {
logic ZICBOZ_SUPPORTED;
logic ZICBOP_SUPPORTED;
logic SVPBMT_SUPPORTED;
logic SVNAPOT_SUPPORTED;
logic SVINVAL_SUPPORTED;
// Microarchitectural Features

View File

@ -375,12 +375,14 @@ module controller import cvw::*; #(parameter cvw_t P) (
LSUPrefetchD = 1'b0;
ImmSrcD = PreImmSrcD;
if (P.ZICBOP_SUPPORTED & (InstrD[14:0] == 15'b110_00000_0010011)) begin // ori with destiation x0 is hint for Prefetch
case (Rs2D) // which type of prefectch? Note: prefetch.r and .w are handled the same in Wally
/* verilator lint_off CASEINCOMPLETE */
case (Rs2D) // which type of prefectch? Note: prefetch.r and .w are handled the same in Wally
5'b00000: IFUPrefetchD = 1'b1; // prefetch.i
5'b00001: LSUPrefetchD = 1'b1; // prefetch.r
5'b00011: LSUPrefetchD = 1'b1; // prefetch.w
// default: not a prefetch hint
endcase
/* verilator lint_on CASEINCOMPLETE */
if (IFUPrefetchD | LSUPrefetchD) ImmSrcD = 3'b001; // use S-type immediate format for prefetches
end
end

View File

@ -85,6 +85,7 @@ module ifu import cvw::*; #(parameter cvw_t P) (
input logic STATUS_SUM, // Status CSR: Supervisor access to user memory
input logic STATUS_MPRV, // Status CSR: modify machine privilege
input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level
input logic ENVCFG_PBMTE, // Page-based memory types enabled
input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries
output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits
@ -170,7 +171,7 @@ module ifu import cvw::*; #(parameter cvw_t P) (
assign TLBFlush = sfencevmaM & ~StallMQ;
mmu #(.P(P), .TLB_ENTRIES(P.ITLB_ENTRIES), .IMMU(1))
immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE,
.PrivilegeModeW, .DisableTranslation(1'b0),
.VAdr(PCFExt),
.Size(2'b10),

View File

@ -80,6 +80,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
input logic [P.XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege
input logic [1:0] STATUS_MPP, // Machine previous privilege mode
input logic ENVCFG_PBMTE, // Page-based memory types enabled
input logic [P.XLEN-1:0] PCSpillF, // Fetch PC
input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits
@ -189,7 +190,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
assign DisableTranslation = SelHPTW | FlushDCacheM;
assign WriteAccessM = PreLSURWM[0] | (|CMOpM);
mmu #(.P(P), .TLB_ENTRIES(P.DTLB_ENTRIES), .IMMU(0))
dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE,
.PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]),
.PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM),
.PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM),

View File

@ -34,6 +34,7 @@ module mmu import cvw::*; #(parameter cvw_t P,
input logic STATUS_SUM, // Status CSR: Supervisor access to user memory
input logic STATUS_MPRV, // Status CSR: modify machine privilege
input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level
input logic ENVCFG_PBMTE, // Page-based memory types enabled
input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor
input logic DisableTranslation, // virtual address translation disabled during D$ flush and HPTW walk that use physical addresses
input logic [P.XLEN+1:0] VAdr, // virtual/physical address from IEU or physical address from HPTW
@ -70,6 +71,7 @@ module mmu import cvw::*; #(parameter cvw_t P,
logic TLBHit; // Hit in TLB
logic TLBPageFault; // Page fault from TLB
logic ReadNoAmoAccessM; // Read that is not part of atomic operation causes Load faults. Otherwise StoreAmo faults
logic [1:0] PBMemoryType; // PBMT field of PTE during TLB hit, or 00 otherwise
// only instantiate TLB if Virtual Memory is supported
if (P.VIRTMEM_SUPPORTED) begin:tlb
@ -80,16 +82,17 @@ module mmu import cvw::*; #(parameter cvw_t P,
.clk, .reset,
.SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]),
.SATP_ASID(SATP_REGW[P.ASID_BASE+P.ASID_BITS-1:P.ASID_BASE]),
.VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
.VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE,
.PrivilegeModeW, .ReadAccess, .WriteAccess,
.DisableTranslation, .PTE, .PageTypeWriteVal,
.TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit,
.Translate, .TLBPageFault, .UpdateDA);
.Translate, .TLBPageFault, .UpdateDA, .PBMemoryType);
end else begin:tlb // just pass address through as physical
assign Translate = 0;
assign TLBMiss = 0;
assign TLBHit = 1; // *** is this necessary
assign TLBPageFault = 0;
assign PBMemoryType = 2'b00;
end
// If translation is occuring, select translated physical address from TLB
@ -103,8 +106,8 @@ module mmu import cvw::*; #(parameter cvw_t P,
///////////////////////////////////////////
pmachecker #(P) pmachecker(.PhysicalAddress, .Size,
.AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM,
.Cacheable, .Idempotent, .SelTIM,
.AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PBMemoryType,
.Cacheable, .Idempotent, .SelTIM,
.PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM);
if (P.PMP_ENTRIES > 0) begin : pmp

View File

@ -35,6 +35,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) (
input logic ExecuteAccessF, // Execute access
input logic WriteAccessM, // Write access
input logic ReadAccessM, // Read access
input logic [1:0] PBMemoryType, // PBMT field of PTE during TLB hit, or 00 otherwise
output logic Cacheable, Idempotent, SelTIM,
output logic PMAInstrAccessFaultF,
output logic PMALoadAccessFaultM,
@ -45,6 +46,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) (
logic AccessRW, AccessRWX, AccessRX;
logic [10:0] SelRegions;
logic AtomicAllowed;
logic CacheableRegion, IdempotentRegion;
// Determine what type of access is being made
assign AccessRW = ReadAccessM | WriteAccessM;
@ -54,11 +56,15 @@ module pmachecker import cvw::*; #(parameter cvw_t P) (
// Determine which region of physical memory (if any) is being accessed
adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions);
// Only non-core RAM/ROM memory regions are cacheable
assign Cacheable = SelRegions[8] | SelRegions[7] | SelRegions[6]; // exclusion-tag: unused-cachable
// Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable
assign CacheableRegion = SelRegions[8] | SelRegions[7] | SelRegions[6];
assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; // exclusion-tag: unused-cachable
// Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly
// I/O is nonidempotent.
assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6]; // exclusion-tag: unused-idempotent
// I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent
assign IdempotentRegion = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6];
assign Idempotent = (PBMemoryType == 2'b00) ? IdempotentRegion : (PBMemoryType == 2'b01); // exclusion-tag: unused-idempotent
// Atomic operations are only allowed on RAM
assign AtomicAllowed = SelRegions[10] | SelRegions[8] | SelRegions[6]; // exclusion-tag: unused-atomic
// Check if tightly integrated memories are selected

View File

@ -57,12 +57,13 @@ module tlb import cvw::*; #(parameter cvw_t P,
input logic [P.ASID_BITS-1:0] SATP_ASID,
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
input logic [1:0] STATUS_MPP,
input logic ENVCFG_PBMTE, // Page-based memory types enabled
input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor
input logic ReadAccess,
input logic WriteAccess,
input logic DisableTranslation,
input logic [P.XLEN-1:0] VAdr, // address input before translation (could be physical or virtual)
input logic [P.XLEN-1:0] PTE,
input logic [P.XLEN-1:0] PTE, // page table entry to write
input logic [1:0] PageTypeWriteVal,
input logic TLBWrite,
input logic TLBFlush,
@ -71,20 +72,23 @@ module tlb import cvw::*; #(parameter cvw_t P,
output logic TLBHit,
output logic Translate,
output logic TLBPageFault,
output logic UpdateDA
output logic UpdateDA,
output logic [1:0] PBMemoryType // PBMT field of PTE during TLB hit, or 00 otherwise
);
logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex
logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs, PTE_NAPOTs; // used as the one-hot encoding of WriteIndex
// Sections of the virtual and physical addresses
logic [P.VPN_BITS-1:0] VPN;
logic [P.PPN_BITS-1:0] PPN;
// Sections of the page table entry
logic [7:0] PTEAccessBits;
logic [10:0] PTEAccessBits;
logic [1:0] HitPageType;
logic CAMHit;
logic SV39Mode;
logic Misaligned;
logic BadPTEWrite; // trying to write malformed PTE
logic MegapageMisaligned;
logic PTE_N; // NAPOT page table entry
if(P.XLEN == 32) begin
assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0
@ -101,20 +105,28 @@ module tlb import cvw::*; #(parameter cvw_t P,
assign VPN = VAdr[P.VPN_BITS+11:12];
tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
// check if reserved, N, or PBMT bits are malformed when PTE is written in RV64
assign BadPTEWrite = (P.XLEN == 64) & TLBWrite & (
PTE[P.XLEN-1] & ~P.SVNAPOT_SUPPORTED | // N must be 0 if SVNAPOT is not supported
PTE[P.XLEN-2:P.XLEN-3] != 0 & ~P.SVPBMT_SUPPORTED | // PBMT must be 0 if SVBPMT is not supported
PTE[P.XLEN-2:P.XLEN-3] == 3 | // PBMT of 3 is reserved and never legal
PTE[P.XLEN-4:P.XLEN-10] != 0 ); // Reserved bits must be 0
tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE,
.PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush,
.PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault,
.UpdateDA, .SV39Mode, .Translate);
.PTEAccessBits, .CAMHit, .Misaligned, .BadPTEWrite,
.TLBMiss, .TLBHit, .TLBPageFault,
.UpdateDA, .SV39Mode, .Translate, .PTE_N, .PBMemoryType);
tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables);
tlbcam #(P, TLB_ENTRIES, P.VPN_BITS + P.ASID_BITS, P.VPN_SEGMENT_BITS)
tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs,
tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, .PTE_NAPOTs,
.SATP_ASID, .Matches, .HitPageType, .CAMHit);
tlbram #(P, TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs);
tlbram #(P, TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs, .PTE_NAPOTs);
// Replace segments of the virtual page number with segments of the physical
// page number. For 4 KB pages, the entire virtual page number is replaced.
// For superpages, some segments are considered offsets into a larger page.
tlbmixer #(P) Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .TLBPAdr);
tlbmixer #(P) Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .PTE_N, .TLBPAdr);
endmodule

View File

@ -38,7 +38,8 @@ module tlbcam import cvw::*; #(parameter cvw_t P,
input logic TLBFlush,
input logic [TLB_ENTRIES-1:0] WriteEnables,
input logic [TLB_ENTRIES-1:0] PTE_Gs,
input logic [P.ASID_BITS-1:0] SATP_ASID,
input logic [TLB_ENTRIES-1:0] PTE_NAPOTs, // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000)
input logic [P.ASID_BITS-1:0] SATP_ASID,
output logic [TLB_ENTRIES-1:0] Matches,
output logic [1:0] HitPageType,
output logic CAMHit
@ -53,7 +54,7 @@ module tlbcam import cvw::*; #(parameter cvw_t P,
// page number segments.
tlbcamline #(P, KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0](
.clk, .reset, .VPN, .SATP_ASID, .SV39Mode, .PTE_G(PTE_Gs), .PageTypeWriteVal, .TLBFlush,
.clk, .reset, .VPN, .SATP_ASID, .SV39Mode, .PTE_G(PTE_Gs), .PTE_NAPOT(PTE_NAPOTs), .PageTypeWriteVal, .TLBFlush,
.WriteEnable(WriteEnables), .PageTypeRead, .Match(Matches));
assign CAMHit = |Matches & ~TLBFlush;
or_rows #(TLB_ENTRIES,2) PageTypeOr(PageTypeRead, HitPageType);

View File

@ -37,6 +37,7 @@ module tlbcamline import cvw::*; #(parameter cvw_t P,
input logic SV39Mode,
input logic WriteEnable, // Write a new entry to this line
input logic PTE_G,
input logic PTE_NAPOT, // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000)
input logic [1:0] PageTypeWriteVal,
input logic TLBFlush, // Flush this line (set valid to 0)
output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one?
@ -76,7 +77,7 @@ module tlbcamline import cvw::*; #(parameter cvw_t P,
end else begin: match
logic [SEGMENT_BITS-1:0] Key2, Key3, Query2, Query3;
logic Match2, Match3;
logic Match2, Match3, MatchNAPOT;
assign {Query3, Query2, Query1, Query0} = VPN;
assign {Key_ASID, Key3, Key2, Key1, Key0} = Key;
@ -84,7 +85,9 @@ module tlbcamline import cvw::*; #(parameter cvw_t P,
// Calculate the actual match value based on the input vpn and the page type.
// For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1]
// should automatically match.
assign Match0 = (Query0 == Key0) | (PageType > 2'd0); // least signifcant section
// In Svnapot, if N bit is set and bottom 4 bits of PPN = 1000, then these bits don't need to match
assign MatchNAPOT = P.SVNAPOT_SUPPORTED & PTE_NAPOT & (Query0[SEGMENT_BITS-1:4] == Key0[SEGMENT_BITS-1:4]);
assign Match0 = (Query0 == Key0) | (PageType > 2'd0) | MatchNAPOT; // least significant section
assign Match1 = (Query1 == Key1) | (PageType > 2'd1);
assign Match2 = (Query2 == Key2) | (PageType > 2'd2);
assign Match3 = (Query3 == Key3) | SV39Mode; // this should always match in sv39 because they aren't used

View File

@ -29,30 +29,37 @@
module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) (
input logic [P.SVMODE_BITS-1:0] SATP_MODE,
input logic [P.XLEN-1:0] VAdr,
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
input logic [1:0] STATUS_MPP,
input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor
input logic ReadAccess, WriteAccess,
input logic DisableTranslation,
input logic TLBFlush, // Invalidate all TLB entries
input logic [7:0] PTEAccessBits,
input logic CAMHit,
input logic Misaligned,
output logic TLBMiss,
output logic TLBHit,
output logic TLBPageFault,
output logic UpdateDA,
output logic SV39Mode,
output logic Translate
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
input logic [1:0] STATUS_MPP,
input logic ENVCFG_PBMTE, // Page-based memory types enabled
input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor
input logic ReadAccess, WriteAccess,
input logic DisableTranslation,
input logic TLBFlush, // Invalidate all TLB entries
input logic [10:0] PTEAccessBits,
input logic CAMHit,
input logic Misaligned,
input logic BadPTEWrite, // trying to write malformed PTE
output logic TLBMiss,
output logic TLBHit,
output logic TLBPageFault,
output logic UpdateDA,
output logic SV39Mode,
output logic Translate,
output logic PTE_N, // NAPOT page table entry
output logic [1:0] PBMemoryType // PBMT field of PTE during TLB hit, or 00 otherwise
);
// Sections of the page table entry
logic [1:0] EffectivePrivilegeMode;
logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R, PTE_V; // Useful PTE Control Bits
logic [1:0] PTE_PBMT;
logic UpperBitsUnequal;
logic TLBAccess;
logic ImproperPrivilege;
logic BadPBMT;
logic CausePageFault;
// Grab the sv mode from SATP and determine whether translation should occur
assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1
@ -65,8 +72,16 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) (
vm64check #(P) vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal);
// unswizzle useful PTE bits
assign PTE_N = PTEAccessBits[10];
assign PTE_PBMT = PTEAccessBits[9:8];
assign {PTE_D, PTE_A} = PTEAccessBits[7:6];
assign {PTE_U, PTE_X, PTE_W, PTE_R, PTE_V} = PTEAccessBits[4:0];
// Page fault if PBMT is nonzero when SVPBMT is not supported and enabled
assign BadPBMT = PTE_PBMT != 0 & ~(P.SVPBMT_SUPPORTED & ENVCFG_PBMTE);
// Send PMA a 2-bit MemoryType that is PBMT during leaf page table accesses and 0 otherwise
assign PBMemoryType = PTE_PBMT & {2{Translate & TLBHit & P.SVPBMT_SUPPORTED}};
// Check whether the access is allowed, page faulting if not.
if (ITLB == 1) begin:itlb // Instruction TLB fault checking
@ -74,14 +89,11 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) (
// only execute non-user mode pages.
assign ImproperPrivilege = ((EffectivePrivilegeMode == P.U_MODE) & ~PTE_U) |
((EffectivePrivilegeMode == P.S_MODE) & PTE_U);
if(P.SVADU_SUPPORTED) begin : hptwwrites
assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault;
assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V);
end else begin
// fault for software handling if access bit is off
assign UpdateDA = ~PTE_A;
assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V);
end
assign CausePageFault = ImproperPrivilege | ~PTE_X | UpperBitsUnequal | BadPTEWrite | BadPBMT | Misaligned | ~PTE_V | (~PTE_A & P.SVADU_SUPPORTED);
assign TLBPageFault = Translate & TLBHit & CausePageFault;
// Determine wheter to update DA bits
if(P.SVADU_SUPPORTED) assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault;
else assign UpdateDA = ~PTE_A;
end else begin:dtlb // Data TLB fault checking
logic InvalidRead, InvalidWrite;

View File

@ -30,18 +30,19 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module tlbmixer import cvw::*; #(parameter cvw_t P) (
input logic [P.VPN_BITS-1:0] VPN,
input logic [P.PPN_BITS-1:0] PPN,
input logic [1:0] HitPageType,
input logic [11:0] Offset,
input logic TLBHit,
output logic [P.PA_BITS-1:0] TLBPAdr
input logic [P.VPN_BITS-1:0] VPN,
input logic [P.PPN_BITS-1:0] PPN,
input logic [1:0] HitPageType,
input logic [11:0] Offset,
input logic TLBHit,
input logic PTE_N, // NAPOT page table entry
output logic [P.PA_BITS-1:0] TLBPAdr
);
localparam EXTRA_BITS = P.PPN_BITS - P.VPN_BITS;
logic [P.PPN_BITS-1:0] ZeroExtendedVPN;
logic [P.PPN_BITS-1:0] PageNumberMask;
logic [P.PPN_BITS-1:0] PPNMixed;
logic [P.PPN_BITS-1:0] PPNMixed, PPNMixed2;
// produce PageNumberMask with 1s where virtual page number bits should be untranslaetd for superpages
if (P.XLEN == 32)
@ -57,11 +58,45 @@ module tlbmixer import cvw::*; #(parameter cvw_t P) (
// merge low segments of VPN with high segments of PPN decided by the pagetype.
assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN.
assign PPNMixed = PPN | ZeroExtendedVPN & PageNumberMask; //
//mux2 #(1) mixmux[P.PPN_BITS-1:0](ZeroExtendedVPN, PPN, PageNumberMask, PPNMixed);
//assign PPNMixed = (ZeroExtendedVPN & ~PageNumberMask) | (PPN & PageNumberMask);
assign PPNMixed = PPN | ZeroExtendedVPN & PageNumberMask; // low bits of PPN are already zero
// In Svnapot, when N=1, use bottom bits of VPN for contiugous translations
if (P.SVNAPOT_SUPPORTED) begin
// 64 KiB contiguous NAPOT translations suported
logic [3:0] PPNMixedBot;
mux2 #(4) napotmux(PPNMixed[3:0], VPN[3:0], PTE_N, PPNMixedBot);
assign PPNMixed2 = {PPNMixed[P.PPN_BITS-1:4], PPNMixedBot};
/* // Generalized NAPOT implementation supporting various sized contiguous regions
// This would also require a priority encoder in the tlbcam
// Not yet tested
logic [8:0] NAPOTMask, NAPOTPN, PPNMixedBot;
always_comb begin
casez(PPN[8:0])
9'b100000000: NAPOTMask = 9'b111111111;
9'b?10000000: NAPOTMask = 9'b011111111;
9'b??1000000: NAPOTMask = 9'b001111111;
9'b???100000: NAPOTMask = 9'b000111111;
9'b????10000: NAPOTMask = 9'b000011111;
9'b?????1000: NAPOTMask = 9'b000001111;
9'b??????100: NAPOTMask = 9'b000000111;
9'b???????10: NAPOTMask = 9'b000000011;
9'b????????1: NAPOTMask = 9'b000000001;
default: NAPOTMask = 9'b000000000;
endcase
end
// check malformed NAPOT PPN, which should cause page fault
// Replace PPN with VPN in lower bits of page number based on mask
assign NAPOTPN = VPN & NAPOTMask | PPN & ~NAPOTMask;
mux2 #(9) napotmux(PPNMixed[8:0], NAPOTPN, PTE_N, PPNMixedBot);
assign PPNMixed2 = {PPNMixed[PPN_BITS-1:9], PPNMixedBot}; */
end else begin // no Svnapot
assign PPNMixed2 = PPNMixed;
end
// Output the hit physical address if translation is currently on.
// Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal
mux2 #(P.PA_BITS) hitmux('0, {PPNMixed, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system
mux2 #(P.PA_BITS) hitmux('0, {PPNMixed2, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system
endmodule

View File

@ -32,23 +32,24 @@
module tlbram import cvw::*; #(parameter cvw_t P,
parameter TLB_ENTRIES = 8) (
input logic clk, reset,
input logic [P.XLEN-1:0] PTE,
input logic [P.XLEN-1:0] PTE,
input logic [TLB_ENTRIES-1:0] Matches, WriteEnables,
output logic [P.PPN_BITS-1:0] PPN,
output logic [7:0] PTEAccessBits,
output logic [TLB_ENTRIES-1:0] PTE_Gs
output logic [P.PPN_BITS-1:0] PPN,
output logic [10:0] PTEAccessBits,
output logic [TLB_ENTRIES-1:0] PTE_Gs,
output logic [TLB_ENTRIES-1:0] PTE_NAPOTs // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000)
);
logic [P.PPN_BITS+9:0] RamRead[TLB_ENTRIES-1:0];
logic [P.PPN_BITS+9:0] PageTableEntry;
logic [P.XLEN-1:0] RamRead[TLB_ENTRIES-1:0]; // stores the page table entries
logic [P.XLEN-1:0] PageTableEntry;
// RAM implemented with array of flops and AND/OR read logic
tlbramline #(P.PPN_BITS+10) tlbramline[TLB_ENTRIES-1:0]
tlbramline #(P) tlbramline[TLB_ENTRIES-1:0]
(.clk, .reset, .re(Matches), .we(WriteEnables),
.d(PTE[P.PPN_BITS+9:0]), .q(RamRead), .PTE_G(PTE_Gs));
or_rows #(TLB_ENTRIES, P.PPN_BITS+10) PTEOr(RamRead, PageTableEntry);
.d(PTE), .q(RamRead), .PTE_G(PTE_Gs), .PTE_NAPOT(PTE_NAPOTs));
or_rows #(TLB_ENTRIES, P.XLEN) PTEOr(RamRead, PageTableEntry);
// Rename the bits read from the TLB RAM
assign PTEAccessBits = PageTableEntry[7:0];
assign PTEAccessBits = {PageTableEntry[P.XLEN-1:P.XLEN-3] & {3{P.XLEN == 64}}, PageTableEntry[7:0]}; // include N and PBMT bits
assign PPN = PageTableEntry[P.PPN_BITS+9:10];
endmodule

View File

@ -26,16 +26,26 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module tlbramline #(parameter WIDTH = 22)
(input logic clk, reset,
input logic re, we,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q,
output logic PTE_G);
module tlbramline import cvw::*; #(parameter cvw_t P)
(input logic clk, reset,
input logic re, we,
input logic [P.XLEN-1:0] d,
output logic [P.XLEN-1:0] q,
output logic PTE_G,
output logic PTE_NAPOT // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000)
);
logic [WIDTH-1:0] line;
logic [P.XLEN-1:0] line;
if (P.XLEN == 64) begin // save 7 reserved bits
// could optimize out N and PBMT from d[63:61] if they aren't supported
logic [56:0] ptereg;
flopenr #(57) pteflop(clk, reset, we, {d[63:61], d[53:0]}, ptereg);
assign line = {ptereg[56:54], 7'b0, ptereg[53:0]};
end else // rv32
flopenr #(P.XLEN) pteflop(clk, reset, we, d, line);
flopenr #(WIDTH) pteflop(clk, reset, we, d, line);
assign q = re ? line : 0;
assign PTE_G = line[5]; // send global bit to CAM as part of ASID matching
assign PTE_NAPOT = P.SVNAPOT_SUPPORTED & line[P.XLEN-1] & (line[13:10] == 4'b1000); // send NAPOT bit to CAM as part of matching lsbs of VPN
endmodule

View File

@ -85,6 +85,7 @@ module csr import cvw::*; #(parameter cvw_t P) (
output var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0],
output logic [2:0] FRM_REGW,
output logic [3:0] ENVCFG_CBE,
output logic ENVCFG_PBMTE, // Page-based memory type enable
//
output logic [P.XLEN-1:0] CSRReadValW, // value read from CSR
output logic [P.XLEN-1:0] UnalignedPCNextF, // Next PC, accounting for traps and returns
@ -127,7 +128,6 @@ module csr import cvw::*; #(parameter cvw_t P) (
logic [63:0] MENVCFG_REGW;
logic [P.XLEN-1:0] SENVCFG_REGW;
logic ENVCFG_STCE; // supervisor timer counter enable
logic ENVCFG_PBMTE; // page-based memory types enable
logic ENVCFG_FIOM; // fence implies io (presently not used)
// only valid unflushed instructions can access CSRs

View File

@ -60,7 +60,7 @@ module csri import cvw::*; #(parameter cvw_t P) (
// SSIP is writable in SIP if S mode exists
if (P.S_SUPPORTED) begin:mask
if (P.SSTC_SUPPORTED) begin
assign MIP_WRITE_MASK = 12'h202; // SEIP and SSIP are writable, but STIP is not writable when STIMECMP is implemented (see SSTC spec)
assign MIP_WRITE_MASK = ENVCFG_STCE ? 12'h202 : 12'h222; // SEIP and SSIP are writable, but STIP is not writable when STIMECMP is implemented (see SSTC spec)
assign STIP = ENVCFG_STCE ? STimerInt : MIP_REGW_writeable[5];
end else begin
assign MIP_WRITE_MASK = 12'h222; // SEIP, STIP, SSIP are writeable in MIP (20210108-draft 3.1.9)

View File

@ -90,7 +90,7 @@ module csrs import cvw::*; #(parameter cvw_t P) (
assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL));
if(P.XLEN == 64) begin
logic LegalSatpModeM;
assign LegalSatpModeM = P.VIRTMEM_SUPPORTED & (CSRWriteValM[63:60] == 0 | CSRWriteValM[63:60] == 8 | CSRWriteValM[63:60] == 9); // supports SV39 and 48
assign LegalSatpModeM = P.VIRTMEM_SUPPORTED & (CSRWriteValM[63:60] == 0 | CSRWriteValM[63:60] == P.SV39 | CSRWriteValM[63:60] == P.SV48); // supports SV39 and 48
assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM) & LegalSatpModeM;
end else // RV32
assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM) & P.VIRTMEM_SUPPORTED;

View File

@ -83,6 +83,7 @@ module privileged import cvw::*; #(parameter cvw_t P) (
output var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], // PMP address entries to MMU
output logic [2:0] FRM_REGW, // FPU rounding mode
output logic [3:0] ENVCFG_CBE, // Cache block operation enables
output logic ENVCFG_PBMTE, // Page-based memory type enable
// PC logic output in privileged unit
output logic [P.XLEN-1:0] UnalignedPCNextF, // Next PC from trap/return PC logic
// control outputs
@ -137,7 +138,7 @@ module privileged import cvw::*; #(parameter cvw_t P) (
.STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS,
.MEDELEG_REGW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW,
.SATP_REGW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.SetFflagsM, .FRM_REGW, .ENVCFG_CBE,
.SetFflagsM, .FRM_REGW, .ENVCFG_CBE, .ENVCFG_PBMTE,
.CSRReadValW,.UnalignedPCNextF, .IllegalCSRAccessM, .BigEndianM);
// pipeline early-arriving trap sources

View File

@ -78,6 +78,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD;
logic SquashSCW;
logic MDUActiveE; // Mul/Div instruction being executed
logic ENVCFG_PBMTE; // Page-based memory type enable
logic [3:0] ENVCFG_CBE; // Cache Block operation enables
logic [3:0] CMOpM; // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero
logic IFUPrefetchE, LSUPrefetchM; // instruction / data prefetch hints
@ -184,7 +185,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM,
// mmu management
.PrivilegeModeW, .PTE, .PageType, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV,
.STATUS_MPP, .ITLBWriteF, .sfencevmaM, .ITLBMissF,
.STATUS_MPP, .ENVCFG_PBMTE, .ITLBWriteF, .sfencevmaM, .ITLBMissF,
// pmp/pma (inside mmu) signals.
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrUpdateDAF);
@ -231,7 +232,8 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.STATUS_MXR, // from csr
.STATUS_SUM, // from csr
.STATUS_MPRV, // from csr
.STATUS_MPP, // from csr
.STATUS_MPP, // from csr
.ENVCFG_PBMTE, // from csr
.sfencevmaM, // connects to privilege
.DCacheStallM, // connects to privilege
.LoadPageFaultM, // connects to privilege
@ -294,7 +296,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.PrivilegeModeW, .SATP_REGW,
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .STATUS_FS,
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.FRM_REGW, .ENVCFG_CBE, .BreakpointFaultM, .EcallFaultM, .wfiM, .IntPendingM, .BigEndianM);
.FRM_REGW, .ENVCFG_CBE, .ENVCFG_PBMTE, .BreakpointFaultM, .EcallFaultM, .wfiM, .IntPendingM, .BigEndianM);
end else begin
assign CSRReadValW = 0;
assign UnalignedPCNextF = PC2NextF;

View File

@ -58,9 +58,11 @@ module riscvassertions import cvw::*; #(parameter cvw_t P);
assert ((P.ZMMUL_SUPPORTED == 0) || (P.M_SUPPORTED ==0)) else $error("At most one of ZMMUL_SUPPORTED and M_SUPPORTED can be enabled");
assert ((P.ZICNTR_SUPPORTED == 0) || (P.ZICSR_SUPPORTED == 1)) else $error("ZICNTR_SUPPORTED requires ZICSR_SUPPORTED");
assert ((P.ZIHPM_SUPPORTED == 0) || (P.ZICNTR_SUPPORTED == 1)) else $error("ZIPHM_SUPPORTED requires ZICNTR_SUPPORTED");
assert ((P.ZICBOM_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOM required DCACHE_SUPPORTED");
assert ((P.ZICBOZ_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOZ required DCACHE_SUPPORTED");
assert ((P.ZICBOP_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOP required DCACHE_SUPPORTED");
assert ((P.ZICBOM_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOM requires DCACHE_SUPPORTED");
assert ((P.ZICBOZ_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOZ requires DCACHE_SUPPORTED");
assert ((P.ZICBOP_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOP requires DCACHE_SUPPORTED");
assert ((P.SVPBMT_SUPPORTED == 0) || (P.VIRTMEM_SUPPORTED == 1 && P.XLEN==64)) else $error("SVPBMT requires VIRTMEM_SUPPORTED and RV64");
assert ((P.SVNAPOT_SUPPORTED == 0) || (P.VIRTMEM_SUPPORTED == 1 && P.XLEN==64)) else $error("SVNAPOT requires VIRTMEM_SUPPORTED and RV64");
end
endmodule

View File

@ -1936,6 +1936,7 @@ string arch64zbs[] = '{
string wally64priv[] = '{
`WALLYTEST,
"rv64i_m/privilege/src/WALLY-csr-permission-s-01.S",
"rv64i_m/privilege/src/WALLY-cboz-01.S",
"rv64i_m/privilege/src/WALLY-cbom-01.S",
"rv64i_m/privilege/src/WALLY-csr-permission-u-01.S",
"rv64i_m/privilege/src/WALLY-mie-01.S",
@ -2030,6 +2031,7 @@ string arch64zbs[] = '{
"rv32i_m/privilege/src/WALLY-csr-permission-s-01.S",
"rv32i_m/privilege/src/WALLY-csr-permission-u-01.S",
"rv32i_m/privilege/src/WALLY-cbom-01.S",
"rv32i_m/privilege/src/WALLY-cboz-01.S",
"rv32i_m/privilege/src/WALLY-mie-01.S",
"rv32i_m/privilege/src/WALLY-minfo-01.S",
"rv32i_m/privilege/src/WALLY-misa-01.S",

View File

@ -72,7 +72,7 @@ sretdone:
li a0, 3
ecall # in M-mode
li t0, 32
csrs sip, t0
csrs mip, t0
li a0, 1
ecall # in S-mode and expects stimer interrupt to occur
li a0, 3

View File

@ -58,6 +58,7 @@ target_tests_nosim = \
WALLY-plic-01 \
WALLY-uart-01 \
WALLY-cbom-01 \
WALLY-cboz-01 \
rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests))

View File

@ -0,0 +1,428 @@
deadbeef # begin_signature
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef # destination 1
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
00000000 # destination 2
00000001
00000002
00000003
00000004
00000005
00000006
00000007
00000008
00000009
0000000a
0000000b
0000000c
0000000d
0000000e
0000000f
00000010
00000011
00000012
00000013
00000014
00000015
00000016
00000017
00000018
00000019
0000001a
0000001b
0000001c
0000001d
0000001e
0000001f
00000020
00000021
00000022
00000023
00000024
00000025
00000026
00000027
00000028
00000029
0000002a
0000002b
0000002c
0000002d
0000002e
0000002f
00000030
00000031
00000032
00000033
00000034
00000035
00000036
00000037
00000038
00000039
0000003a
0000003b
0000003c
0000003d
0000003e
0000003f
00000040
00000041
00000042
00000043
00000044
00000045
00000046
00000047
00000048
00000049
0000004a
0000004b
0000004c
0000004d
0000004e
0000004f
00000050
00000051
00000052
00000053
00000054
00000055
00000056
00000057
00000058
00000059
0000005a
0000005b
0000005c
0000005d
0000005e
0000005f
00000060
00000061
00000062
00000063
00000064
00000065
00000066
00000067
00000068
00000069
0000006a
0000006b
0000006c
0000006d
0000006e
0000006f
00000070
00000071
00000072
00000073
00000074
00000075
00000076
00000077
00000078
00000079
0000007a
0000007b
0000007c
0000007d
0000007e
0000007f
00000000 # destination 3
00000001
00000002
00000003
00000004
00000005
00000006
00000007
00000008
00000009
0000000a
0000000b
0000000c
0000000d
0000000e
0000000f
00000010
00000011
00000012
00000013
00000014
00000015
00000016
00000017
00000018
00000019
0000001a
0000001b
0000001c
0000001d
0000001e
0000001f
00000020
00000021
00000022
00000023
00000024
00000025
00000026
00000027
00000028
00000029
0000002a
0000002b
0000002c
0000002d
0000002e
0000002f
00000030
00000031
00000032
00000033
00000034
00000035
00000036
00000037
00000038
00000039
0000003a
0000003b
0000003c
0000003d
0000003e
0000003f
00000040
00000041
00000042
00000043
00000044
00000045
00000046
00000047
00000048
00000049
0000004a
0000004b
0000004c
0000004d
0000004e
0000004f
00000050
00000051
00000052
00000053
00000054
00000055
00000056
00000057
00000058
00000059
0000005a
0000005b
0000005c
0000005d
0000005e
0000005f
00000060
00000061
00000062
00000063
00000064
00000065
00000066
00000067
00000068
00000069
0000006a
0000006b
0000006c
0000006d
0000006e
0000006f
00000070
00000071
00000072
00000073
00000074
00000075
00000076
00000077
00000078
00000079
0000007a
0000007b
0000007c
0000007d
0000007e
0000007f
ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies.
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
0bad0bad # controls
0bad0bad
0bad0bad

View File

@ -0,0 +1,188 @@
deadbeef # begin_signature
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
00000000 # destination 1
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000 # destination 2
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies.
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
0bad0bad
0bad0bad

View File

@ -0,0 +1,472 @@
///////////////////////////////////////////
//
// WALLY-cache-management-tests
// invalidate, clean, and flush
//
// Author: Rose Thompson <ross1728@gmail.com>
//
// Created 18 August 2023
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
# Purpose: Tests the 3 Zicbom cache instructions which all operate on cacheline
# granularity blocks of memory. Invalidate: Clears valid and dirty bits
# and does not write back. Clean: Writes back dirty cacheline if needed
# and clears dirty bit. Does NOT clear valid bit. Flush: Cleans and then
# Invalidates. These operations apply to all caches in the memory system.
# The tests are divided into three parts one for the data cache, instruction cache
# and checks to verify the uncached regions of memory cause exceptions.
# -----------
# Copyright (c) 2020. RISC-V International. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# -----------
#
# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension.
#
#include "model_test.h"
#include "arch_test.h"
RVTEST_ISA("RV32I_Zicbom")
# Test code region
.section .text.init
.globl rvtest_entry_point
rvtest_entry_point:
RVMODEL_BOOT
RVTEST_CODE_BEGIN
RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.inval)
RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n")
CBOMTest:
# *** TODO
# first need to discover the length of the cacheline.
# for now assume it is 64 bytes
#addi sp, sp, -16
#sd s0, 0(sp)
#sd ra, 8(sp)
la s0, signature
################################################################################
# INVALIDATE D$
################################################################################
# theory of operation
# 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
# 2. Then verify the second region has the same data
# 3. Invalidate the second region
# 4. Verify the second region has the original invalid data
# DON'T batch each step. We want to see the transition between cachelines. The current should be invalidated
# but the next should have the copied data.
# step 1
CBOMTest_inval_step1:
la a0, SourceData
la a1, Destination1
li a2, 128
jal ra, memcpy4
# step 2
CBOMTest_inval_step2:
la a0, SourceData
la a1, Destination1
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 3
CBOMTest_inval_step3:
la a1, Destination1
cbo.inval (a1)
# step 4 (should be Invalid)
la a0, DeadBeafData1
la a1, Destination1
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 4 next line (should still be valid)
CBOMTest_inval_step4:
la a0, SourceData+64
la a1, Destination1+64
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 3 (Invalidate all remaining lines)
CBOMTest_inval_step3_all:
la a1, Destination1+64
cbo.inval (a1)
cbo.inval (a1) # verify invalidating an already non present line does not cause an issue.
la a1, Destination1+128
cbo.inval (a1)
la a1, Destination1+192
cbo.inval (a1)
la a1, Destination1+256
cbo.inval (a1)
la a1, Destination1+320
cbo.inval (a1)
la a1, Destination1+384
cbo.inval (a1)
la a1, Destination1+448
cbo.inval (a1)
# step 4 All should be invalid
CBOMTest_inval_step4_all:
la a0, DeadBeafData1
la a1, Destination1
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
################################################################################
# Clean D$
################################################################################
# theory of operation
# 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
# 2. Then verify the second region has the same data
# 3. Invalidate the second region
# 4. Verify the second region has the original invalid data
# 5. Repeat step 1
# 6. Clean cachelines
# 7. Verify the second region has the same data
# 8. Invalidate the second region
# 9. Verify again but this time it should contain the same data
# DON'T batch each step. We want to see the transition between cachelines. The current should be invalidated
# but the next should have the copied data.
# step 1
CBOMTest_clean_step1:
la a0, SourceData
la a1, Destination2
li a2, 128
jal ra, memcpy4
# step 2
CBOMTest_clean_step2:
la a0, SourceData
la a1, Destination2
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 3
CBOMTest_clean_step3:
la a1, Destination2
cbo.inval (a1)
la a1, Destination2+64
cbo.inval (a1)
la a1, Destination2+128
cbo.inval (a1)
la a1, Destination2+192
cbo.inval (a1)
la a1, Destination2+256
cbo.inval (a1)
la a1, Destination2+320
cbo.inval (a1)
la a1, Destination2+384
cbo.inval (a1)
la a1, Destination2+448
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
cbo.inval (a1)
# step 4 All should be invalid
CBOMTest_clean_step4:
la a0, DeadBeafData1
la a1, Destination2
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 5
CBOMTest_clean_step5:
la a0, SourceData
la a1, Destination2
li a2, 128
jal ra, memcpy4
# step 6 only clean 1 line
CBOMTest_clean_step6:
la a1, Destination2
cbo.clean (a1)
# step 7 only check that 1 line
CBOMTest_clean_step7:
la a0, SourceData
la a1, Destination2
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 8 invalidate that 1 line and the next
CBOMTest_clean_step8:
la a1, Destination2
cbo.inval (a1)
la a1, Destination2+64
cbo.inval (a1)
# step 9 that 1 line should contain the valid data
CBOMTest_clean_step9_line1:
la a0, SourceData
la a1, Destination2
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 9 the next should contain the invalid data
CBOMTest_clean_step9_line2:
la a0, DeadBeafData1
la a1, Destination2+64
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 5 # now recopy the one we just corrupted
CBOMTest_clean_step5_recopy_line2:
la a0, SourceData+64
la a1, Destination2+64
li a2, 16
jal ra, memcpy4
# step 6 # clean the remaining
CBOMTest_clean_step6_clean_all:
la a1, Destination2+64
cbo.clean (a1)
la a1, Destination2+128
cbo.clean (a1)
la a1, Destination2+192
cbo.clean (a1)
la a1, Destination2+256
cbo.clean (a1)
la a1, Destination2+320
cbo.clean (a1)
la a1, Destination2+384
cbo.clean (a1)
la a1, Destination2+448
cbo.clean (a1)
cbo.clean (a1)
cbo.clean (a1)
cbo.clean (a1)
cbo.clean (a1)
cbo.clean (a1)
cbo.clean (a1)
cbo.clean (a1)
cbo.clean (a1)
# step 8 # invalidate all remaining
CBOMTest_clean_step7_invalidate_all:
la a1, Destination2
cbo.inval (a1)
la a1, Destination2+64
cbo.inval (a1)
la a1, Destination2+128
cbo.inval (a1)
la a1, Destination2+192
cbo.inval (a1)
la a1, Destination2+256
cbo.inval (a1)
la a1, Destination2+320
cbo.inval (a1)
la a1, Destination2+384
cbo.inval (a1)
la a1, Destination2+448
cbo.inval (a1)
# step 9 # check all
CBOMTest_clean_step9_check_all:
la a0, SourceData
la a1, Destination2
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
################################################################################
# Flush D$ line
################################################################################
# theory of operation
# 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
# 2. Then verify the second region has the same data
# 3. For flush there is no way to create a negative control. We will flush 1 cache line
# 4. Verify whole region
# 5. Flush the remaining lines
# 6. Verify whole region
# step 1
CBOMTest_flush_step1:
la a0, SourceData
la a1, Destination3
li a2, 128
jal ra, memcpy4
# step 2 All should be valid
CBOMTest_flush_step2_verify:
la a0, SourceData
la a1, Destination3
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 3 # flush 1 line
CBOMTest_flush_step3:
la a1, Destination3
cbo.flush (a1)
# step 4
CBOMTest_flush_step4_verify:
la a0, SourceData
la a1, Destination3
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 5
CBOMTest_flush_step5_flush_all:
la a1, Destination3
cbo.flush (a1)
la a1, Destination3+64
cbo.flush (a1)
la a1, Destination3+128
cbo.flush (a1)
la a1, Destination3+192
cbo.flush (a1)
la a1, Destination3+256
cbo.flush (a1)
la a1, Destination3+320
cbo.flush (a1)
la a1, Destination3+384
cbo.flush (a1)
la a1, Destination3+448
cbo.flush (a1)
cbo.flush (a1)
cbo.flush (a1)
cbo.flush (a1)
cbo.flush (a1)
cbo.flush (a1)
# step 6
CBOMTest_flush_step6_verify:
la a0, SourceData
la a1, Destination3
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
#lw s0, 0(sp)
#lw ra, 8(sp)
#addi sp, sp, 16
#ret
RVMODEL_HALT
.type memcpy4, @function
memcpy4:
# a0 is the source
# a1 is the dst
# a2 is the number of 4 byte words
mv t0, a0
mv t1, a1
li t2, 0
memcpy4_loop:
lw t3, 0(t0)
sw t3, 0(t1)
addi t0, t0, 4
addi t1, t1, 4
addi t2, t2, 1
blt t2, a2, memcpy4_loop
ret
.type memcmp4, @function
# returns which index mismatch, -1 if none
memcmp4:
# a0 is the source1
# a1 is the source2
# a2 is the number of 4 byte words
mv t0, a0
mv t1, a1
li t2, 0
memcmp4_loop:
lw t3, 0(t0)
lw t4, 0(t1)
bne t3, t4, memcmp4_ne
addi t0, t0, 4
addi t1, t1, 4
addi t2, t2, 1
blt t2, a2, memcmp4_loop
li a0, -1
ret
memcmp4_ne:
mv a0, t2
ret
RVTEST_CODE_END
RVTEST_DATA_BEGIN
# Input data section.
#.data
.align 7
DeadBeafData1:
.fill 128, 4, 0xdeadbeef
SourceData:
.int 0, 1, 2, 3, 4, 5, 6, 7
.int 8, 9, 10, 11, 12, 13, 14, 15
.int 16, 17, 18, 19, 20, 21, 22, 23
.int 24, 25, 26, 27, 28, 29, 30, 31
.int 32, 33, 34, 35, 36, 37, 38, 39
.int 40, 41, 42, 43, 44, 45, 46, 47
.int 48, 49, 50, 51, 52, 53, 54, 55
.int 56, 57, 58, 59, 60, 61, 62, 63
.int 64, 65, 66, 67, 68, 69, 70, 71
.int 72, 73, 74, 75, 76, 77, 78, 79
.int 80, 81, 82, 83, 84, 85, 86, 87
.int 88, 89, 90, 91, 92, 93, 94, 95
.int 96, 97, 98, 99, 100, 101, 102, 103
.int 104, 105, 106, 107, 108, 109, 110, 111
.int 112, 113, 114, 115, 116, 117, 118, 119
.int 120, 121, 122, 123, 124, 125, 126, 127
RVTEST_DATA_END
RVMODEL_DATA_BEGIN
.fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert
# 4 bytes. This needs to be aligned to a cacheline
.align 6
Destination1:
.fill 128, 4, 0xdeadbeef
Destination2:
.fill 128, 4, 0xdeadbeef
Destination3:
.fill 128, 4, 0xdeadbeef
signature:
.fill 16, 4, 0x0bad0bad
RVMODEL_DATA_END

View File

@ -0,0 +1,377 @@
///////////////////////////////////////////
//
// WALLY-cache-management-tests
// invalidate, clean, and flush
//
// Author: Rose Thompson <ross1728@gmail.com>
//
// Created 22 August 2023
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
# Purpose: Tests the Zicboz cache instruction which all operate on cacheline
# granularity blocks of memory. The instruction cbo.zero allocates a cacheline
# and writes 0 to each byte. A dirty cacheline is overwritten, any data in main
# memory is over written.
# -----------
# Copyright (c) 2020. RISC-V International. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# -----------
#
# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension.
#
#include "model_test.h"
#include "arch_test.h"
RVTEST_ISA("RV32I_Zicboz_Zicbom")
# Test code region
.section .text.init
.globl rvtest_entry_point
rvtest_entry_point:
RVMODEL_BOOT
RVTEST_CODE_BEGIN
RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.zero)
RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n")
CBOZTest:
# *** TODO
# first need to discover the length of the cacheline.
# for now assume it is 64 bytes
#addi sp, sp, -16
#sd s0, 0(sp)
#sd ra, 8(sp)
la s0, signature
################################################################################
# Zero cache line hit overwrites
################################################################################
# theory of operation
# 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
# 2. Then verify the second region has the same data
# 3. Zero that region of memory
# 4. Verify the second region is all zero.
# step 1
CBOZTest_zero_step1:
la a0, SourceData
la a1, Destination1
li a2, 128
jal ra, memcpy4
# step 2
CBOZTest_zero_step2:
la a0, SourceData
la a1, Destination1
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 3
CBOZTest_zero_step3:
la a1, Destination1
cbo.zero (a1)
la a1, Destination1+64
cbo.zero (a1)
la a1, Destination1+128
cbo.zero (a1)
la a1, Destination1+192
cbo.zero (a1)
la a1, Destination1+256
cbo.zero (a1)
la a1, Destination1+320
cbo.zero (a1)
la a1, Destination1+384
cbo.zero (a1)
la a1, Destination1+448
cbo.zero (a1)
CBOZTest_zero_step4:
# step 4 (should be zero)
la a0, ZeroData
la a1, Destination1
li a2, 128
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
################################################################################
# Verify cbo.zero miss overwrites
################################################################################
# theory of operation
# 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory
# 2. Then verify the second region has the same data
# 3. Flush that one line
# 4. Zero that one line
# 5. Verify the second region is zero
# step 1
CBOZTest_miss_zero_step1:
la a0, SourceData
la a1, Destination1
li a2, 16
jal ra, memcpy4
# step 2
CBOZTest_miss_zero_step2:
la a0, SourceData
la a1, Destination1
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 3
CBOZTest_miss_zero_step3:
la a1, Destination1
cbo.flush (a1)
cbo.zero (a1)
CBOZTest_miss_zero_step4:
# step 4 (should be Invalid)
la a0, ZeroData
la a1, Destination1
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
################################################################################
# Verify cbo.zero miss with eviction overwrites
################################################################################
# theory of operation
# 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory
# 2. Repeate 1 four times at 4KiB intervals
# 2. Then verify the second region has the same data
# 4. Zero each line
# 5. Verify the second region is zero
# step 1
CBOZTest_eviction_zero_step1_0:
la a0, SourceData
la a1, Destination2
li a2, 16
jal ra, memcpy4
CBOZTest_eviction_zero_step2_4096:
la a0, SourceData+8
la a1, Destination2+4096
li a2, 16
jal ra, memcpy4
CBOZTest_eviction_zero_step2_8192:
la a0, SourceData+16
la a1, Destination2+8192
li a2, 16
jal ra, memcpy4
CBOZTest_eviction_zero_step2_12288:
la a0, SourceData+24
la a1, Destination2+12288
li a2, 16
jal ra, memcpy4
CBOZTest_eviction_zero_step2_16384:
la a0, SourceData+32
la a1, Destination2+16384
li a2, 16
jal ra, memcpy4
# step 3
CBOZTest_eviction_zero_step3_0:
la a0, SourceData
la a1, Destination2
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step3_4096:
la a0, SourceData+8
la a1, Destination2+4096
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step3_8192:
la a0, SourceData+16
la a1, Destination2+8192
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step3_12288:
la a0, SourceData+24
la a1, Destination2+12288
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step3_16384:
la a0, SourceData+32
la a1, Destination2+16384
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
# step 4
CBOZTest_eviction_zero_step4:
la a1, Destination2
cbo.zero (a1)
la a1, Destination2+4096
cbo.zero (a1)
la a1, Destination2+8192
cbo.zero (a1)
la a1, Destination2+12288
cbo.zero (a1)
la a1, Destination2+16384
cbo.zero (a1)
CBOZTest_eviction_zero_step5_0:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step5_4096:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+4096
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step5_8192:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+8192
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step5_12288:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+12288
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
CBOZTest_eviction_zero_step5_16384:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+16384
li a2, 16
jal ra, memcmp4
sw a0, 0(s0) # should be -1
addi s0, s0, 4
#ld s0, 0(sp)
#ld ra, 8(sp)
#addi sp, sp, 16
#ret
RVMODEL_HALT
.type memcpy4, @function
memcpy4:
# a0 is the source
# a1 is the dst
# a2 is the number of 4 byte words
mv t0, a0
mv t1, a1
li t2, 0
memcpy4_loop:
lw t3, 0(t0)
sw t3, 0(t1)
addi t0, t0, 4
addi t1, t1, 4
addi t2, t2, 1
blt t2, a2, memcpy4_loop
ret
.type memcmp4, @function
# returns which index mismatch, -1 if none
memcmp4:
# a0 is the source1
# a1 is the source2
# a2 is the number of 4 byte words
mv t0, a0
mv t1, a1
li t2, 0
memcmp4_loop:
lw t3, 0(t0)
lw t4, 0(t1)
bne t3, t4, memcmp4_ne
addi t0, t0, 4
addi t1, t1, 4
addi t2, t2, 1
blt t2, a2, memcmp4_loop
li a0, -1
ret
memcmp4_ne:
mv a0, t2
ret
RVTEST_CODE_END
RVTEST_DATA_BEGIN
# Input data section.
#.data
.align 7
ZeroData:
.fill 128, 4, 0x0
SourceData:
.int 0, 1, 2, 3, 4, 5, 6, 7
.int 8, 9, 10, 11, 12, 13, 14, 15
.int 16, 17, 18, 19, 20, 21, 22, 23
.int 24, 25, 26, 27, 28, 29, 30, 31
.int 32, 33, 34, 35, 36, 37, 38, 39
.int 40, 41, 42, 43, 44, 45, 46, 47
.int 48, 49, 50, 51, 52, 53, 54, 55
.int 56, 57, 58, 59, 60, 61, 62, 63
.int 64, 65, 66, 67, 68, 69, 70, 71
.int 72, 73, 74, 75, 76, 77, 78, 79
.int 80, 81, 82, 83, 84, 85, 86, 87
.int 88, 89, 90, 91, 92, 93, 94, 95
.int 96, 97, 98, 99, 100, 101, 102, 103
.int 104, 105, 106, 107, 108, 109, 110, 111
.int 112, 113, 114, 115, 116, 117, 118, 119
.int 120, 121, 122, 123, 124, 125, 126, 127
RVTEST_DATA_END
RVMODEL_DATA_BEGIN
.fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert
# 4 bytes. This needs to be aligned to a cacheline
.align 6
Destination1:
.fill 128, 4, 0xdeadbeef
Destination2:
.fill 16, 4, 0xdeadbeef
signature:
.fill 16, 4, 0x0bad0bad
RVMODEL_DATA_END

View File

@ -57,6 +57,7 @@ target_tests_nosim = \
WALLY-status-fp-enabled-01 \
WALLY-wfi-01 \
WALLY-cbom-01 \
WALLY-cboz-01 \
# unclear why status-fp-enabled and wfi aren't simulating ok

View File

@ -0,0 +1,204 @@
deadbeef # begin_signature
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
deadbeef
00000000 # destination 1
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000 # destination 2
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies.
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
ffffffff
0bad0bad
0bad0bad
0bad0bad
0bad0bad

View File

@ -0,0 +1,377 @@
///////////////////////////////////////////
//
// WALLY-cache-management-tests
// invalidate, clean, and flush
//
// Author: Rose Thompson <ross1728@gmail.com>
//
// Created 22 August 2023
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
# Purpose: Tests the Zicboz cache instruction which all operate on cacheline
# granularity blocks of memory. The instruction cbo.zero allocates a cacheline
# and writes 0 to each byte. A dirty cacheline is overwritten, any data in main
# memory is over written.
# -----------
# Copyright (c) 2020. RISC-V International. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# -----------
#
# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension.
#
#include "model_test.h"
#include "arch_test.h"
RVTEST_ISA("RV64I_Zicboz_Zicbom")
# Test code region
.section .text.init
.globl rvtest_entry_point
rvtest_entry_point:
RVMODEL_BOOT
RVTEST_CODE_BEGIN
RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.zero)
RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n")
CBOZTest:
# *** TODO
# first need to discover the length of the cacheline.
# for now assume it is 64 bytes
#addi sp, sp, -16
#sd s0, 0(sp)
#sd ra, 8(sp)
la s0, signature
################################################################################
# Zero cache line hit overwrites
################################################################################
# theory of operation
# 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory
# 2. Then verify the second region has the same data
# 3. Zero that region of memory
# 4. Verify the second region is all zero.
# step 1
CBOZTest_zero_step1:
la a0, SourceData
la a1, Destination1
li a2, 64
jal ra, memcpy8
# step 2
CBOZTest_zero_step2:
la a0, SourceData
la a1, Destination1
li a2, 64
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
# step 3
CBOZTest_zero_step3:
la a1, Destination1
cbo.zero (a1)
la a1, Destination1+64
cbo.zero (a1)
la a1, Destination1+128
cbo.zero (a1)
la a1, Destination1+192
cbo.zero (a1)
la a1, Destination1+256
cbo.zero (a1)
la a1, Destination1+320
cbo.zero (a1)
la a1, Destination1+384
cbo.zero (a1)
la a1, Destination1+448
cbo.zero (a1)
CBOZTest_zero_step4:
# step 4 (should be zero)
la a0, ZeroData
la a1, Destination1
li a2, 64
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
################################################################################
# Verify cbo.zero miss overwrites
################################################################################
# theory of operation
# 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory
# 2. Then verify the second region has the same data
# 3. Flush that one line
# 4. Zero that one line
# 5. Verify the second region is zero
# step 1
CBOZTest_miss_zero_step1:
la a0, SourceData
la a1, Destination1
li a2, 8
jal ra, memcpy8
# step 2
CBOZTest_miss_zero_step2:
la a0, SourceData
la a1, Destination1
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
# step 3
CBOZTest_miss_zero_step3:
la a1, Destination1
cbo.flush (a1)
cbo.zero (a1)
CBOZTest_miss_zero_step4:
# step 4 (should be Invalid)
la a0, ZeroData
la a1, Destination1
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
################################################################################
# Verify cbo.zero miss with eviction overwrites
################################################################################
# theory of operation
# 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory
# 2. Repeate 1 four times at 4KiB intervals
# 2. Then verify the second region has the same data
# 4. Zero each line
# 5. Verify the second region is zero
# step 1
CBOZTest_eviction_zero_step1_0:
la a0, SourceData
la a1, Destination2
li a2, 8
jal ra, memcpy8
CBOZTest_eviction_zero_step2_4096:
la a0, SourceData+8
la a1, Destination2+4096
li a2, 8
jal ra, memcpy8
CBOZTest_eviction_zero_step2_8192:
la a0, SourceData+16
la a1, Destination2+8192
li a2, 8
jal ra, memcpy8
CBOZTest_eviction_zero_step2_12288:
la a0, SourceData+24
la a1, Destination2+12288
li a2, 8
jal ra, memcpy8
CBOZTest_eviction_zero_step2_16384:
la a0, SourceData+32
la a1, Destination2+16384
li a2, 8
jal ra, memcpy8
# step 3
CBOZTest_eviction_zero_step3_0:
la a0, SourceData
la a1, Destination2
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step3_4096:
la a0, SourceData+8
la a1, Destination2+4096
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step3_8192:
la a0, SourceData+16
la a1, Destination2+8192
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step3_12288:
la a0, SourceData+24
la a1, Destination2+12288
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step3_16384:
la a0, SourceData+32
la a1, Destination2+16384
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
# step 4
CBOZTest_eviction_zero_step4:
la a1, Destination2
cbo.zero (a1)
la a1, Destination2+4096
cbo.zero (a1)
la a1, Destination2+8192
cbo.zero (a1)
la a1, Destination2+12288
cbo.zero (a1)
la a1, Destination2+16384
cbo.zero (a1)
CBOZTest_eviction_zero_step5_0:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step5_4096:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+4096
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step5_8192:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+8192
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step5_12288:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+12288
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
CBOZTest_eviction_zero_step5_16384:
# step 5 (should be zero)
la a0, ZeroData
la a1, Destination2+16384
li a2, 8
jal ra, memcmp8
sd a0, 0(s0) # should be -1
addi s0, s0, 8
#ld s0, 0(sp)
#ld ra, 8(sp)
#addi sp, sp, 16
#ret
RVMODEL_HALT
.type memcpy8, @function
memcpy8:
# a0 is the source
# a1 is the dst
# a2 is the number of 8 byte words
mv t0, a0
mv t1, a1
li t2, 0
memcpy8_loop:
ld t3, 0(t0)
sd t3, 0(t1)
addi t0, t0, 8
addi t1, t1, 8
addi t2, t2, 1
blt t2, a2, memcpy8_loop
ret
.type memcmp8, @function
# returns which index mismatch, -1 if none
memcmp8:
# a0 is the source1
# a1 is the source2
# a2 is the number of 8 byte words
mv t0, a0
mv t1, a1
li t2, 0
memcmp8_loop:
ld t3, 0(t0)
ld t4, 0(t1)
bne t3, t4, memcmp8_ne
addi t0, t0, 8
addi t1, t1, 8
addi t2, t2, 1
blt t2, a2, memcmp8_loop
li a0, -1
ret
memcmp8_ne:
mv a0, t2
ret
RVTEST_CODE_END
RVTEST_DATA_BEGIN
# Input data section.
#.data
.align 7
ZeroData:
.fill 128, 4, 0x0
SourceData:
.int 0, 1, 2, 3, 4, 5, 6, 7
.int 8, 9, 10, 11, 12, 13, 14, 15
.int 16, 17, 18, 19, 20, 21, 22, 23
.int 24, 25, 26, 27, 28, 29, 30, 31
.int 32, 33, 34, 35, 36, 37, 38, 39
.int 40, 41, 42, 43, 44, 45, 46, 47
.int 48, 49, 50, 51, 52, 53, 54, 55
.int 56, 57, 58, 59, 60, 61, 62, 63
.int 64, 65, 66, 67, 68, 69, 70, 71
.int 72, 73, 74, 75, 76, 77, 78, 79
.int 80, 81, 82, 83, 84, 85, 86, 87
.int 88, 89, 90, 91, 92, 93, 94, 95
.int 96, 97, 98, 99, 100, 101, 102, 103
.int 104, 105, 106, 107, 108, 109, 110, 111
.int 112, 113, 114, 115, 116, 117, 118, 119
.int 120, 121, 122, 123, 124, 125, 126, 127
RVTEST_DATA_END
RVMODEL_DATA_BEGIN
.fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert
# 4 bytes. This needs to be aligned to a cacheline
.align 6
Destination1:
.fill 128, 4, 0xdeadbeef
Destination2:
.fill 16, 4, 0xdeadbeef
signature:
.fill 32, 4, 0x0bad0bad
RVMODEL_DATA_END