diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 6376904dc..38960c735 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -42,11 +42,12 @@ localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZFH_SUPPORTED = 0; -localparam SSTC_SUPPORTED = 0; -localparam ZICBOM_SUPPORTED = 0; -localparam ZICBOZ_SUPPORTED = 0; -localparam ZICBOP_SUPPORTED = 0; -localparam SVPBMT_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 1; +localparam ZICBOM_SUPPORTED = 1; +localparam ZICBOZ_SUPPORTED = 1; +localparam ZICBOP_SUPPORTED = 1; +localparam SVPBMT_SUPPORTED = 1; +localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; // LSU microarchitectural Features diff --git a/config/fpga/config.vh b/config/fpga/config.vh index 3551da010..7e582fabb 100644 --- a/config/fpga/config.vh +++ b/config/fpga/config.vh @@ -44,11 +44,12 @@ localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; -localparam SSTC_SUPPORTED = 0; -localparam ZICBOM_SUPPORTED = 0; -localparam ZICBOZ_SUPPORTED = 0; -localparam ZICBOP_SUPPORTED = 0; -localparam SVPBMT_SUPPORTED = 0; +localparam SSTC_SUPPORTED = 1; +localparam ZICBOM_SUPPORTED = 1; +localparam ZICBOZ_SUPPORTED = 1; +localparam ZICBOP_SUPPORTED = 1; +localparam SVPBMT_SUPPORTED = 1; +localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; // LSU microarchitectural Features diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index 83c4efd52..e1cbdab0f 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; +localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; // LSU microarchitectural Features diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index b08bf06d8..07f005f43 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -46,9 +46,10 @@ localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; -localparam ZICBOZ_SUPPORTED = 0; +localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; +localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; // LSU microarchitectural Features diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index 1897ddeb2..e1c5a6a5d 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; +localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; // LSU microarchitectural Features diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index 85d3597f9..a9123cbb4 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -47,6 +47,7 @@ localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; +localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; // LSU microarchitectural Features diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index f0c6cc6b2..2533dbc21 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; +localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; // LSU microarchitectural Features diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 939ce72c8..16e50b899 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -48,9 +48,10 @@ localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; -localparam ZICBOZ_SUPPORTED = 0; -localparam ZICBOP_SUPPORTED = 0; -localparam SVPBMT_SUPPORTED = 0; +localparam ZICBOZ_SUPPORTED = 1; +localparam ZICBOP_SUPPORTED = 1; +localparam SVPBMT_SUPPORTED = 1; +localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; // LSU microarchitectural Features diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index 3d7f33544..c27f7faf0 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -48,6 +48,7 @@ localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; +localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; // LSU microarchitectural Features diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 96ef1e929..54a6675ee 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -101,10 +101,10 @@ localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4)); localparam DURLEN = ($clog2(FPDUR+1)); localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b) localparam DIVBLEN = ($clog2(DIVb+1)-1); -localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu +localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result // largest length in IEU/FPU -localparam CVTLEN = ((NF(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index 6e01aabb4..f63028a92 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -25,6 +25,7 @@ parameter cvw_t P = '{ ZICBOZ_SUPPORTED : ZICBOZ_SUPPORTED, ZICBOP_SUPPORTED : ZICBOP_SUPPORTED, SVPBMT_SUPPORTED : SVPBMT_SUPPORTED, + SVNAPOT_SUPPORTED : SVNAPOT_SUPPORTED, SVINVAL_SUPPORTED : SVINVAL_SUPPORTED, BUS_SUPPORTED : BUS_SUPPORTED, DCACHE_SUPPORTED : DCACHE_SUPPORTED, diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 52b54029f..1714544ec 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -101,6 +101,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN/8-1:0] LineByteMask; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic ZeroCacheLine; + logic CMOZeroHit; logic [LINELEN-1:0] PreLineWriteData; genvar index; @@ -119,7 +120,7 @@ module cache import cvw::*; #(parameter cvw_t P, // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CMOp, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .VictimWay, + .SetValid, .ClearValid, .SetDirty, .ClearDirty, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches @@ -225,7 +226,7 @@ module cache import cvw::*; #(parameter cvw_t P, .FlushStage, .CacheRW, .CacheAtomic, .Stall, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, - .ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .SelFlush, + .ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CMOp, .CacheEn, .LRUWriteEn); diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 8f0e7aa2d..124b92678 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -60,6 +60,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic SetDirty, // Set the dirty bit in the selected way and set output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic ZeroCacheLine, // Write zeros to all bytes of cacheline + output logic CMOZeroHit, // CMOZ hit output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag output logic LRUWriteEn, // Update the LRU state @@ -75,7 +76,10 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic AnyUpdateHit, AnyHit; logic AnyMiss; logic FlushFlag; - + logic CMOWritebackHit; + logic CMOZeroNoEviction; + logic CMOZeroEviction; + typedef enum logic [3:0]{STATE_READY, // hit states // miss states STATE_FETCH, @@ -93,8 +97,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P, statetype CurrState, NextState; assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 + assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit + assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; + assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now + assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line + assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. @@ -117,8 +125,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement - else if(AnyMiss) /* & LineDirty */ NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else if((CMOp[1] | CMOp[2]) & CacheHit) NextState = STATE_CMO_WRITEBACK; + else if(AnyMiss | CMOZeroEviction) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement + else if(CMOWritebackHit) NextState = STATE_CMO_WRITEBACK; else NextState = STATE_READY; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else if(CacheBusAck) NextState = STATE_READY; @@ -127,7 +135,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; else NextState = STATE_READY; // exclusion-tag-start: icache case - STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH; + STATE_WRITEBACK: if(CacheBusAck & ~CMOp[3]) NextState = STATE_FETCH; + else if(CacheBusAck) NextState = STATE_CMO_DONE; else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; @@ -139,6 +148,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_CMO_WRITEBACK: if(CacheBusAck & (CMOp[1] | CMOp[2])) NextState = STATE_CMO_DONE; else NextState = STATE_CMO_WRITEBACK; + STATE_CMO_DONE: if(Stall) NextState = STATE_CMO_DONE; + else NextState = STATE_READY; // exclusion-tag-end: icache case default: NextState = STATE_READY; endcase @@ -146,7 +157,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // com back to CPU assign CacheCommitted = (CurrState != STATE_READY) & ~(READ_ONLY_CACHE & (CurrState == STATE_READ_HOLD | CurrState == STATE_CMO_DONE)); - assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | ((CMOp[1] | CMOp[2]) & CacheHit))) | // exclusion-tag: icache StallStates + assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | CMOWritebackHit | CMOZeroEviction)) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -154,21 +165,26 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK) | (CurrState == STATE_CMO_WRITEBACK); // write enables internal to cache + assign CMOZeroHit = CurrState == STATE_READY & CMOp[3] & CacheHit ; assign SetValid = CurrState == STATE_WRITE_LINE | - (CurrState == STATE_READY & CMOp[3]); // *** RT: NOT completely right has to be a hit + (CurrState == STATE_READY & CMOZeroNoEviction) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0] & CacheHit) | (CurrState == STATE_CMO_WRITEBACK & CMOp[2] & CacheBusAck)); // coverage off -item e 1 -fecexprrow 8 - assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | + assign LRUWriteEn = (CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck) | (CurrState == STATE_WRITE_LINE) & ~FlushStage; // exclusion-tag-start: icache flushdirtycontrols - assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOp[3])) | // exclusion-tag: icache SetDirty *** NOT completely right has to be a hit for CMOp[3] - (CurrState == STATE_WRITE_LINE & (CacheRW[0])); + assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty + (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls (P.ZICBOM_SUPPORTED & CurrState == STATE_CMO_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck); - assign ZeroCacheLine = CurrState == STATE_READY & CMOp[3]; // *** RT: NOT completely right + assign ZeroCacheLine = P.ZICBOZ_SUPPORTED & ((CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck))); assign SelWriteback = (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_READY & AnyMiss & LineDirty); assign SelCMOWriteback = CurrState == STATE_CMO_WRITEBACK; @@ -188,7 +204,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // Bus interface controls assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses (CurrState == STATE_FETCH & ~CacheBusAck) | - (CurrState == STATE_WRITEBACK & CacheBusAck); + (CurrState == STATE_WRITEBACK & CacheBusAck & ~CMOp[3]); assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 85d2b36ab..216cd82d2 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -41,10 +41,10 @@ module cacheway import cvw::*; #(parameter cvw_t P, input logic SetValid, // Set the valid bit in the selected way and set input logic ClearValid, // Clear the valid bit in the selected way and set input logic SetDirty, // Set the dirty bit in the selected way and set - input logic ZeroCacheLine, // Write zeros to all bytes of a cache line + input logic CMOZeroHit, // Write zeros to all bytes of a cache line input logic ClearDirty, // Clear the dirty bit in the selected way and set input logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback - input logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag + input logic SelCMOWriteback,// Overrides cached tag check to select a specific way and set for writeback for both data and tag input logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr input logic VictimWay, // LRU selected this way as victim to evict input logic FlushWay, // This way is selected for flush and possible writeback if dirty @@ -81,7 +81,9 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic SelNotHit2; if (P.ZICBOZ_SUPPORTED) begin : cbologic - assign SelNotHit2 = SetValid & ~(ZeroCacheLine & HitWay); + assign SelNotHit2 = SetValid & ~CMOZeroHit; + //assign SelNotHit2 = SetValid; + end else begin : cbologic assign SelNotHit2 = SetValid; end @@ -96,7 +98,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, // nonzero ways will never see SelFlush=0 while FlushWay=1 since FlushWay only advances on a subset of SelFlush assertion cases. assign FlushWayEn = FlushWay & SelFlush; // *** RT: This is slopy. I should refactor to have the fsm issue two types of writeback commands - assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; + assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; // *** this is not correct + //assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; end else begin:flushlogic // no flush operation for read-only caches. assign SelTag = VictimWay; diff --git a/src/cvw.sv b/src/cvw.sv index 818773087..0c3e959fc 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -60,6 +60,7 @@ typedef struct packed { logic ZICBOZ_SUPPORTED; logic ZICBOP_SUPPORTED; logic SVPBMT_SUPPORTED; + logic SVNAPOT_SUPPORTED; logic SVINVAL_SUPPORTED; // Microarchitectural Features diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index ecb1e6076..e923f1e3f 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -375,12 +375,14 @@ module controller import cvw::*; #(parameter cvw_t P) ( LSUPrefetchD = 1'b0; ImmSrcD = PreImmSrcD; if (P.ZICBOP_SUPPORTED & (InstrD[14:0] == 15'b110_00000_0010011)) begin // ori with destiation x0 is hint for Prefetch - case (Rs2D) // which type of prefectch? Note: prefetch.r and .w are handled the same in Wally + /* verilator lint_off CASEINCOMPLETE */ + case (Rs2D) // which type of prefectch? Note: prefetch.r and .w are handled the same in Wally 5'b00000: IFUPrefetchD = 1'b1; // prefetch.i 5'b00001: LSUPrefetchD = 1'b1; // prefetch.r 5'b00011: LSUPrefetchD = 1'b1; // prefetch.w // default: not a prefetch hint endcase + /* verilator lint_on CASEINCOMPLETE */ if (IFUPrefetchD | LSUPrefetchD) ImmSrcD = 3'b001; // use S-type immediate format for prefetches end end diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 2bceb6175..320b9e330 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -85,6 +85,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( input logic STATUS_SUM, // Status CSR: Supervisor access to user memory input logic STATUS_MPRV, // Status CSR: modify machine privilege input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level + input logic ENVCFG_PBMTE, // Page-based memory types enabled input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk output logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits @@ -170,7 +171,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( assign TLBFlush = sfencevmaM & ~StallMQ; mmu #(.P(P), .TLB_ENTRIES(P.ITLB_ENTRIES), .IMMU(1)) - immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .PrivilegeModeW, .DisableTranslation(1'b0), .VAdr(PCFExt), .Size(2'b10), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index aeadec262..e120d454b 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -80,6 +80,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege input logic [1:0] STATUS_MPP, // Machine previous privilege mode + input logic ENVCFG_PBMTE, // Page-based memory types enabled input logic [P.XLEN-1:0] PCSpillF, // Fetch PC input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits @@ -189,7 +190,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DisableTranslation = SelHPTW | FlushDCacheM; assign WriteAccessM = PreLSURWM[0] | (|CMOpM); mmu #(.P(P), .TLB_ENTRIES(P.DTLB_ENTRIES), .IMMU(0)) - dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM), .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM), diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index f05073fcf..e8e06fde0 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -34,6 +34,7 @@ module mmu import cvw::*; #(parameter cvw_t P, input logic STATUS_SUM, // Status CSR: Supervisor access to user memory input logic STATUS_MPRV, // Status CSR: modify machine privilege input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level + input logic ENVCFG_PBMTE, // Page-based memory types enabled input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic DisableTranslation, // virtual address translation disabled during D$ flush and HPTW walk that use physical addresses input logic [P.XLEN+1:0] VAdr, // virtual/physical address from IEU or physical address from HPTW @@ -70,6 +71,7 @@ module mmu import cvw::*; #(parameter cvw_t P, logic TLBHit; // Hit in TLB logic TLBPageFault; // Page fault from TLB logic ReadNoAmoAccessM; // Read that is not part of atomic operation causes Load faults. Otherwise StoreAmo faults + logic [1:0] PBMemoryType; // PBMT field of PTE during TLB hit, or 00 otherwise // only instantiate TLB if Virtual Memory is supported if (P.VIRTMEM_SUPPORTED) begin:tlb @@ -80,16 +82,17 @@ module mmu import cvw::*; #(parameter cvw_t P, .clk, .reset, .SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .SATP_ASID(SATP_REGW[P.ASID_BASE+P.ASID_BITS-1:P.ASID_BASE]), - .VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, - .Translate, .TLBPageFault, .UpdateDA); + .Translate, .TLBPageFault, .UpdateDA, .PBMemoryType); end else begin:tlb // just pass address through as physical assign Translate = 0; assign TLBMiss = 0; assign TLBHit = 1; // *** is this necessary assign TLBPageFault = 0; + assign PBMemoryType = 2'b00; end // If translation is occuring, select translated physical address from TLB @@ -103,8 +106,8 @@ module mmu import cvw::*; #(parameter cvw_t P, /////////////////////////////////////////// pmachecker #(P) pmachecker(.PhysicalAddress, .Size, - .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, - .Cacheable, .Idempotent, .SelTIM, + .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PBMemoryType, + .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); if (P.PMP_ENTRIES > 0) begin : pmp diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 41d8f9b74..ce129af51 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -35,6 +35,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( input logic ExecuteAccessF, // Execute access input logic WriteAccessM, // Write access input logic ReadAccessM, // Read access + input logic [1:0] PBMemoryType, // PBMT field of PTE during TLB hit, or 00 otherwise output logic Cacheable, Idempotent, SelTIM, output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, @@ -45,6 +46,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( logic AccessRW, AccessRWX, AccessRX; logic [10:0] SelRegions; logic AtomicAllowed; + logic CacheableRegion, IdempotentRegion; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; @@ -54,11 +56,15 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Determine which region of physical memory (if any) is being accessed adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions); - // Only non-core RAM/ROM memory regions are cacheable - assign Cacheable = SelRegions[8] | SelRegions[7] | SelRegions[6]; // exclusion-tag: unused-cachable + // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable + assign CacheableRegion = SelRegions[8] | SelRegions[7] | SelRegions[6]; + assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; // exclusion-tag: unused-cachable + // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly - // I/O is nonidempotent. - assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6]; // exclusion-tag: unused-idempotent + // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent + assign IdempotentRegion = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[7] | SelRegions[6]; + assign Idempotent = (PBMemoryType == 2'b00) ? IdempotentRegion : (PBMemoryType == 2'b01); // exclusion-tag: unused-idempotent + // Atomic operations are only allowed on RAM assign AtomicAllowed = SelRegions[10] | SelRegions[8] | SelRegions[6]; // exclusion-tag: unused-atomic // Check if tightly integrated memories are selected diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv index c081b0925..497a97fe5 100644 --- a/src/mmu/tlb/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -57,12 +57,13 @@ module tlb import cvw::*; #(parameter cvw_t P, input logic [P.ASID_BITS-1:0] SATP_ASID, input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, input logic [1:0] STATUS_MPP, + input logic ENVCFG_PBMTE, // Page-based memory types enabled input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic ReadAccess, input logic WriteAccess, input logic DisableTranslation, input logic [P.XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) - input logic [P.XLEN-1:0] PTE, + input logic [P.XLEN-1:0] PTE, // page table entry to write input logic [1:0] PageTypeWriteVal, input logic TLBWrite, input logic TLBFlush, @@ -71,20 +72,23 @@ module tlb import cvw::*; #(parameter cvw_t P, output logic TLBHit, output logic Translate, output logic TLBPageFault, - output logic UpdateDA + output logic UpdateDA, + output logic [1:0] PBMemoryType // PBMT field of PTE during TLB hit, or 00 otherwise ); - logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs; // used as the one-hot encoding of WriteIndex + logic [TLB_ENTRIES-1:0] Matches, WriteEnables, PTE_Gs, PTE_NAPOTs; // used as the one-hot encoding of WriteIndex // Sections of the virtual and physical addresses logic [P.VPN_BITS-1:0] VPN; logic [P.PPN_BITS-1:0] PPN; // Sections of the page table entry - logic [7:0] PTEAccessBits; + logic [10:0] PTEAccessBits; logic [1:0] HitPageType; logic CAMHit; logic SV39Mode; logic Misaligned; + logic BadPTEWrite; // trying to write malformed PTE logic MegapageMisaligned; + logic PTE_N; // NAPOT page table entry if(P.XLEN == 32) begin assign MegapageMisaligned = |(PPN[9:0]); // must have zero PPN0 @@ -101,20 +105,28 @@ module tlb import cvw::*; #(parameter cvw_t P, assign VPN = VAdr[P.VPN_BITS+11:12]; - tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + // check if reserved, N, or PBMT bits are malformed when PTE is written in RV64 + assign BadPTEWrite = (P.XLEN == 64) & TLBWrite & ( + PTE[P.XLEN-1] & ~P.SVNAPOT_SUPPORTED | // N must be 0 if SVNAPOT is not supported + PTE[P.XLEN-2:P.XLEN-3] != 0 & ~P.SVPBMT_SUPPORTED | // PBMT must be 0 if SVBPMT is not supported + PTE[P.XLEN-2:P.XLEN-3] == 3 | // PBMT of 3 is reserved and never legal + PTE[P.XLEN-4:P.XLEN-10] != 0 ); // Reserved bits must be 0 + + tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, - .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, - .UpdateDA, .SV39Mode, .Translate); + .PTEAccessBits, .CAMHit, .Misaligned, .BadPTEWrite, + .TLBMiss, .TLBHit, .TLBPageFault, + .UpdateDA, .SV39Mode, .Translate, .PTE_N, .PBMemoryType); tlblru #(TLB_ENTRIES) lru(.clk, .reset, .TLBWrite, .TLBFlush, .Matches, .CAMHit, .WriteEnables); tlbcam #(P, TLB_ENTRIES, P.VPN_BITS + P.ASID_BITS, P.VPN_SEGMENT_BITS) - tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, + tlbcam(.clk, .reset, .VPN, .PageTypeWriteVal, .SV39Mode, .TLBFlush, .WriteEnables, .PTE_Gs, .PTE_NAPOTs, .SATP_ASID, .Matches, .HitPageType, .CAMHit); - tlbram #(P, TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs); + tlbram #(P, TLB_ENTRIES) tlbram(.clk, .reset, .PTE, .Matches, .WriteEnables, .PPN, .PTEAccessBits, .PTE_Gs, .PTE_NAPOTs); // Replace segments of the virtual page number with segments of the physical // page number. For 4 KB pages, the entire virtual page number is replaced. // For superpages, some segments are considered offsets into a larger page. - tlbmixer #(P) Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .TLBPAdr); + tlbmixer #(P) Mixer(.VPN, .PPN, .HitPageType, .Offset(VAdr[11:0]), .TLBHit, .PTE_N, .TLBPAdr); endmodule diff --git a/src/mmu/tlb/tlbcam.sv b/src/mmu/tlb/tlbcam.sv index 7a4100829..e591498c4 100644 --- a/src/mmu/tlb/tlbcam.sv +++ b/src/mmu/tlb/tlbcam.sv @@ -38,7 +38,8 @@ module tlbcam import cvw::*; #(parameter cvw_t P, input logic TLBFlush, input logic [TLB_ENTRIES-1:0] WriteEnables, input logic [TLB_ENTRIES-1:0] PTE_Gs, - input logic [P.ASID_BITS-1:0] SATP_ASID, + input logic [TLB_ENTRIES-1:0] PTE_NAPOTs, // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000) + input logic [P.ASID_BITS-1:0] SATP_ASID, output logic [TLB_ENTRIES-1:0] Matches, output logic [1:0] HitPageType, output logic CAMHit @@ -53,7 +54,7 @@ module tlbcam import cvw::*; #(parameter cvw_t P, // page number segments. tlbcamline #(P, KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0]( - .clk, .reset, .VPN, .SATP_ASID, .SV39Mode, .PTE_G(PTE_Gs), .PageTypeWriteVal, .TLBFlush, + .clk, .reset, .VPN, .SATP_ASID, .SV39Mode, .PTE_G(PTE_Gs), .PTE_NAPOT(PTE_NAPOTs), .PageTypeWriteVal, .TLBFlush, .WriteEnable(WriteEnables), .PageTypeRead, .Match(Matches)); assign CAMHit = |Matches & ~TLBFlush; or_rows #(TLB_ENTRIES,2) PageTypeOr(PageTypeRead, HitPageType); diff --git a/src/mmu/tlb/tlbcamline.sv b/src/mmu/tlb/tlbcamline.sv index 55023006f..9471fb3d9 100644 --- a/src/mmu/tlb/tlbcamline.sv +++ b/src/mmu/tlb/tlbcamline.sv @@ -37,6 +37,7 @@ module tlbcamline import cvw::*; #(parameter cvw_t P, input logic SV39Mode, input logic WriteEnable, // Write a new entry to this line input logic PTE_G, + input logic PTE_NAPOT, // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000) input logic [1:0] PageTypeWriteVal, input logic TLBFlush, // Flush this line (set valid to 0) output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? @@ -76,7 +77,7 @@ module tlbcamline import cvw::*; #(parameter cvw_t P, end else begin: match logic [SEGMENT_BITS-1:0] Key2, Key3, Query2, Query3; - logic Match2, Match3; + logic Match2, Match3, MatchNAPOT; assign {Query3, Query2, Query1, Query0} = VPN; assign {Key_ASID, Key3, Key2, Key1, Key0} = Key; @@ -84,7 +85,9 @@ module tlbcamline import cvw::*; #(parameter cvw_t P, // Calculate the actual match value based on the input vpn and the page type. // For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1] // should automatically match. - assign Match0 = (Query0 == Key0) | (PageType > 2'd0); // least signifcant section + // In Svnapot, if N bit is set and bottom 4 bits of PPN = 1000, then these bits don't need to match + assign MatchNAPOT = P.SVNAPOT_SUPPORTED & PTE_NAPOT & (Query0[SEGMENT_BITS-1:4] == Key0[SEGMENT_BITS-1:4]); + assign Match0 = (Query0 == Key0) | (PageType > 2'd0) | MatchNAPOT; // least significant section assign Match1 = (Query1 == Key1) | (PageType > 2'd1); assign Match2 = (Query2 == Key2) | (PageType > 2'd2); assign Match3 = (Query3 == Key3) | SV39Mode; // this should always match in sv39 because they aren't used diff --git a/src/mmu/tlb/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv index 67d598038..bfd8a9251 100644 --- a/src/mmu/tlb/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -29,30 +29,37 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( input logic [P.SVMODE_BITS-1:0] SATP_MODE, input logic [P.XLEN-1:0] VAdr, - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor - input logic ReadAccess, WriteAccess, - input logic DisableTranslation, - input logic TLBFlush, // Invalidate all TLB entries - input logic [7:0] PTEAccessBits, - input logic CAMHit, - input logic Misaligned, - output logic TLBMiss, - output logic TLBHit, - output logic TLBPageFault, - output logic UpdateDA, - output logic SV39Mode, - output logic Translate + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, + input logic ENVCFG_PBMTE, // Page-based memory types enabled + input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor + input logic ReadAccess, WriteAccess, + input logic DisableTranslation, + input logic TLBFlush, // Invalidate all TLB entries + input logic [10:0] PTEAccessBits, + input logic CAMHit, + input logic Misaligned, + input logic BadPTEWrite, // trying to write malformed PTE + output logic TLBMiss, + output logic TLBHit, + output logic TLBPageFault, + output logic UpdateDA, + output logic SV39Mode, + output logic Translate, + output logic PTE_N, // NAPOT page table entry + output logic [1:0] PBMemoryType // PBMT field of PTE during TLB hit, or 00 otherwise ); // Sections of the page table entry logic [1:0] EffectivePrivilegeMode; logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R, PTE_V; // Useful PTE Control Bits + logic [1:0] PTE_PBMT; logic UpperBitsUnequal; logic TLBAccess; logic ImproperPrivilege; + logic BadPBMT; + logic CausePageFault; // Grab the sv mode from SATP and determine whether translation should occur assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 @@ -65,8 +72,16 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( vm64check #(P) vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); // unswizzle useful PTE bits + assign PTE_N = PTEAccessBits[10]; + assign PTE_PBMT = PTEAccessBits[9:8]; assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; assign {PTE_U, PTE_X, PTE_W, PTE_R, PTE_V} = PTEAccessBits[4:0]; + + // Page fault if PBMT is nonzero when SVPBMT is not supported and enabled + assign BadPBMT = PTE_PBMT != 0 & ~(P.SVPBMT_SUPPORTED & ENVCFG_PBMTE); + + // Send PMA a 2-bit MemoryType that is PBMT during leaf page table accesses and 0 otherwise + assign PBMemoryType = PTE_PBMT & {2{Translate & TLBHit & P.SVPBMT_SUPPORTED}}; // Check whether the access is allowed, page faulting if not. if (ITLB == 1) begin:itlb // Instruction TLB fault checking @@ -74,14 +89,11 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == P.U_MODE) & ~PTE_U) | ((EffectivePrivilegeMode == P.S_MODE) & PTE_U); - if(P.SVADU_SUPPORTED) begin : hptwwrites - assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault; - assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpperBitsUnequal | Misaligned | ~PTE_V); - end else begin - // fault for software handling if access bit is off - assign UpdateDA = ~PTE_A; - assign TLBPageFault = Translate & TLBHit & (ImproperPrivilege | ~PTE_X | UpdateDA | UpperBitsUnequal | Misaligned | ~PTE_V); - end + assign CausePageFault = ImproperPrivilege | ~PTE_X | UpperBitsUnequal | BadPTEWrite | BadPBMT | Misaligned | ~PTE_V | (~PTE_A & P.SVADU_SUPPORTED); + assign TLBPageFault = Translate & TLBHit & CausePageFault; + // Determine wheter to update DA bits + if(P.SVADU_SUPPORTED) assign UpdateDA = Translate & TLBHit & ~PTE_A & ~TLBPageFault; + else assign UpdateDA = ~PTE_A; end else begin:dtlb // Data TLB fault checking logic InvalidRead, InvalidWrite; diff --git a/src/mmu/tlb/tlbmixer.sv b/src/mmu/tlb/tlbmixer.sv index 07d6eb985..9652e21ef 100644 --- a/src/mmu/tlb/tlbmixer.sv +++ b/src/mmu/tlb/tlbmixer.sv @@ -30,18 +30,19 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module tlbmixer import cvw::*; #(parameter cvw_t P) ( - input logic [P.VPN_BITS-1:0] VPN, - input logic [P.PPN_BITS-1:0] PPN, - input logic [1:0] HitPageType, - input logic [11:0] Offset, - input logic TLBHit, - output logic [P.PA_BITS-1:0] TLBPAdr + input logic [P.VPN_BITS-1:0] VPN, + input logic [P.PPN_BITS-1:0] PPN, + input logic [1:0] HitPageType, + input logic [11:0] Offset, + input logic TLBHit, + input logic PTE_N, // NAPOT page table entry + output logic [P.PA_BITS-1:0] TLBPAdr ); localparam EXTRA_BITS = P.PPN_BITS - P.VPN_BITS; logic [P.PPN_BITS-1:0] ZeroExtendedVPN; logic [P.PPN_BITS-1:0] PageNumberMask; - logic [P.PPN_BITS-1:0] PPNMixed; + logic [P.PPN_BITS-1:0] PPNMixed, PPNMixed2; // produce PageNumberMask with 1s where virtual page number bits should be untranslaetd for superpages if (P.XLEN == 32) @@ -57,11 +58,45 @@ module tlbmixer import cvw::*; #(parameter cvw_t P) ( // merge low segments of VPN with high segments of PPN decided by the pagetype. assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. - assign PPNMixed = PPN | ZeroExtendedVPN & PageNumberMask; // - //mux2 #(1) mixmux[P.PPN_BITS-1:0](ZeroExtendedVPN, PPN, PageNumberMask, PPNMixed); - //assign PPNMixed = (ZeroExtendedVPN & ~PageNumberMask) | (PPN & PageNumberMask); + assign PPNMixed = PPN | ZeroExtendedVPN & PageNumberMask; // low bits of PPN are already zero + + // In Svnapot, when N=1, use bottom bits of VPN for contiugous translations + if (P.SVNAPOT_SUPPORTED) begin + // 64 KiB contiguous NAPOT translations suported + logic [3:0] PPNMixedBot; + mux2 #(4) napotmux(PPNMixed[3:0], VPN[3:0], PTE_N, PPNMixedBot); + assign PPNMixed2 = {PPNMixed[P.PPN_BITS-1:4], PPNMixedBot}; + + /* // Generalized NAPOT implementation supporting various sized contiguous regions + // This would also require a priority encoder in the tlbcam + // Not yet tested + logic [8:0] NAPOTMask, NAPOTPN, PPNMixedBot; + always_comb begin + casez(PPN[8:0]) + 9'b100000000: NAPOTMask = 9'b111111111; + 9'b?10000000: NAPOTMask = 9'b011111111; + 9'b??1000000: NAPOTMask = 9'b001111111; + 9'b???100000: NAPOTMask = 9'b000111111; + 9'b????10000: NAPOTMask = 9'b000011111; + 9'b?????1000: NAPOTMask = 9'b000001111; + 9'b??????100: NAPOTMask = 9'b000000111; + 9'b???????10: NAPOTMask = 9'b000000011; + 9'b????????1: NAPOTMask = 9'b000000001; + default: NAPOTMask = 9'b000000000; + endcase + end + // check malformed NAPOT PPN, which should cause page fault + // Replace PPN with VPN in lower bits of page number based on mask + assign NAPOTPN = VPN & NAPOTMask | PPN & ~NAPOTMask; + mux2 #(9) napotmux(PPNMixed[8:0], NAPOTPN, PTE_N, PPNMixedBot); + assign PPNMixed2 = {PPNMixed[PPN_BITS-1:9], PPNMixedBot}; */ + + end else begin // no Svnapot + assign PPNMixed2 = PPNMixed; + end + // Output the hit physical address if translation is currently on. // Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal - mux2 #(P.PA_BITS) hitmux('0, {PPNMixed, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system + mux2 #(P.PA_BITS) hitmux('0, {PPNMixed2, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system endmodule diff --git a/src/mmu/tlb/tlbram.sv b/src/mmu/tlb/tlbram.sv index eb8dedca7..07d9dd87c 100644 --- a/src/mmu/tlb/tlbram.sv +++ b/src/mmu/tlb/tlbram.sv @@ -32,23 +32,24 @@ module tlbram import cvw::*; #(parameter cvw_t P, parameter TLB_ENTRIES = 8) ( input logic clk, reset, - input logic [P.XLEN-1:0] PTE, + input logic [P.XLEN-1:0] PTE, input logic [TLB_ENTRIES-1:0] Matches, WriteEnables, - output logic [P.PPN_BITS-1:0] PPN, - output logic [7:0] PTEAccessBits, - output logic [TLB_ENTRIES-1:0] PTE_Gs + output logic [P.PPN_BITS-1:0] PPN, + output logic [10:0] PTEAccessBits, + output logic [TLB_ENTRIES-1:0] PTE_Gs, + output logic [TLB_ENTRIES-1:0] PTE_NAPOTs // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000) ); - logic [P.PPN_BITS+9:0] RamRead[TLB_ENTRIES-1:0]; - logic [P.PPN_BITS+9:0] PageTableEntry; + logic [P.XLEN-1:0] RamRead[TLB_ENTRIES-1:0]; // stores the page table entries + logic [P.XLEN-1:0] PageTableEntry; // RAM implemented with array of flops and AND/OR read logic - tlbramline #(P.PPN_BITS+10) tlbramline[TLB_ENTRIES-1:0] + tlbramline #(P) tlbramline[TLB_ENTRIES-1:0] (.clk, .reset, .re(Matches), .we(WriteEnables), - .d(PTE[P.PPN_BITS+9:0]), .q(RamRead), .PTE_G(PTE_Gs)); - or_rows #(TLB_ENTRIES, P.PPN_BITS+10) PTEOr(RamRead, PageTableEntry); + .d(PTE), .q(RamRead), .PTE_G(PTE_Gs), .PTE_NAPOT(PTE_NAPOTs)); + or_rows #(TLB_ENTRIES, P.XLEN) PTEOr(RamRead, PageTableEntry); // Rename the bits read from the TLB RAM - assign PTEAccessBits = PageTableEntry[7:0]; + assign PTEAccessBits = {PageTableEntry[P.XLEN-1:P.XLEN-3] & {3{P.XLEN == 64}}, PageTableEntry[7:0]}; // include N and PBMT bits assign PPN = PageTableEntry[P.PPN_BITS+9:10]; endmodule diff --git a/src/mmu/tlb/tlbramline.sv b/src/mmu/tlb/tlbramline.sv index cc393f72a..971e804ac 100644 --- a/src/mmu/tlb/tlbramline.sv +++ b/src/mmu/tlb/tlbramline.sv @@ -26,16 +26,26 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module tlbramline #(parameter WIDTH = 22) - (input logic clk, reset, - input logic re, we, - input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - output logic PTE_G); +module tlbramline import cvw::*; #(parameter cvw_t P) + (input logic clk, reset, + input logic re, we, + input logic [P.XLEN-1:0] d, + output logic [P.XLEN-1:0] q, + output logic PTE_G, + output logic PTE_NAPOT // entry is in NAPOT mode (N bit set and PPN[3:0] = 1000) +); - logic [WIDTH-1:0] line; + logic [P.XLEN-1:0] line; + + if (P.XLEN == 64) begin // save 7 reserved bits + // could optimize out N and PBMT from d[63:61] if they aren't supported + logic [56:0] ptereg; + flopenr #(57) pteflop(clk, reset, we, {d[63:61], d[53:0]}, ptereg); + assign line = {ptereg[56:54], 7'b0, ptereg[53:0]}; + end else // rv32 + flopenr #(P.XLEN) pteflop(clk, reset, we, d, line); - flopenr #(WIDTH) pteflop(clk, reset, we, d, line); assign q = re ? line : 0; assign PTE_G = line[5]; // send global bit to CAM as part of ASID matching + assign PTE_NAPOT = P.SVNAPOT_SUPPORTED & line[P.XLEN-1] & (line[13:10] == 4'b1000); // send NAPOT bit to CAM as part of matching lsbs of VPN endmodule diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index ccca40a00..f99ee28b1 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -85,6 +85,7 @@ module csr import cvw::*; #(parameter cvw_t P) ( output var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW, output logic [3:0] ENVCFG_CBE, + output logic ENVCFG_PBMTE, // Page-based memory type enable // output logic [P.XLEN-1:0] CSRReadValW, // value read from CSR output logic [P.XLEN-1:0] UnalignedPCNextF, // Next PC, accounting for traps and returns @@ -127,7 +128,6 @@ module csr import cvw::*; #(parameter cvw_t P) ( logic [63:0] MENVCFG_REGW; logic [P.XLEN-1:0] SENVCFG_REGW; logic ENVCFG_STCE; // supervisor timer counter enable - logic ENVCFG_PBMTE; // page-based memory types enable logic ENVCFG_FIOM; // fence implies io (presently not used) // only valid unflushed instructions can access CSRs diff --git a/src/privileged/csri.sv b/src/privileged/csri.sv index 2e5488af7..ea7bf7afb 100644 --- a/src/privileged/csri.sv +++ b/src/privileged/csri.sv @@ -60,7 +60,7 @@ module csri import cvw::*; #(parameter cvw_t P) ( // SSIP is writable in SIP if S mode exists if (P.S_SUPPORTED) begin:mask if (P.SSTC_SUPPORTED) begin - assign MIP_WRITE_MASK = 12'h202; // SEIP and SSIP are writable, but STIP is not writable when STIMECMP is implemented (see SSTC spec) + assign MIP_WRITE_MASK = ENVCFG_STCE ? 12'h202 : 12'h222; // SEIP and SSIP are writable, but STIP is not writable when STIMECMP is implemented (see SSTC spec) assign STIP = ENVCFG_STCE ? STimerInt : MIP_REGW_writeable[5]; end else begin assign MIP_WRITE_MASK = 12'h222; // SEIP, STIP, SSIP are writeable in MIP (20210108-draft 3.1.9) diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index 95599e3a0..fa329f363 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -90,7 +90,7 @@ module csrs import cvw::*; #(parameter cvw_t P) ( assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)); if(P.XLEN == 64) begin logic LegalSatpModeM; - assign LegalSatpModeM = P.VIRTMEM_SUPPORTED & (CSRWriteValM[63:60] == 0 | CSRWriteValM[63:60] == 8 | CSRWriteValM[63:60] == 9); // supports SV39 and 48 + assign LegalSatpModeM = P.VIRTMEM_SUPPORTED & (CSRWriteValM[63:60] == 0 | CSRWriteValM[63:60] == P.SV39 | CSRWriteValM[63:60] == P.SV48); // supports SV39 and 48 assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM) & LegalSatpModeM; end else // RV32 assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM) & P.VIRTMEM_SUPPORTED; diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index 46c69f17d..619ed2b32 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -83,6 +83,7 @@ module privileged import cvw::*; #(parameter cvw_t P) ( output var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], // PMP address entries to MMU output logic [2:0] FRM_REGW, // FPU rounding mode output logic [3:0] ENVCFG_CBE, // Cache block operation enables + output logic ENVCFG_PBMTE, // Page-based memory type enable // PC logic output in privileged unit output logic [P.XLEN-1:0] UnalignedPCNextF, // Next PC from trap/return PC logic // control outputs @@ -137,7 +138,7 @@ module privileged import cvw::*; #(parameter cvw_t P) ( .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS, .MEDELEG_REGW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .SATP_REGW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .SetFflagsM, .FRM_REGW, .ENVCFG_CBE, + .SetFflagsM, .FRM_REGW, .ENVCFG_CBE, .ENVCFG_PBMTE, .CSRReadValW,.UnalignedPCNextF, .IllegalCSRAccessM, .BigEndianM); // pipeline early-arriving trap sources diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 793ea5777..5fbc89a26 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -78,6 +78,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD; logic SquashSCW; logic MDUActiveE; // Mul/Div instruction being executed + logic ENVCFG_PBMTE; // Page-based memory type enable logic [3:0] ENVCFG_CBE; // Cache Block operation enables logic [3:0] CMOpM; // 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero logic IFUPrefetchE, LSUPrefetchM; // instruction / data prefetch hints @@ -184,7 +185,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .IllegalBaseInstrD, .IllegalFPUInstrD, .InstrPageFaultF, .IllegalIEUFPUInstrD, .InstrMisalignedFaultM, // mmu management .PrivilegeModeW, .PTE, .PageType, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, - .STATUS_MPP, .ITLBWriteF, .sfencevmaM, .ITLBMissF, + .STATUS_MPP, .ENVCFG_PBMTE, .ITLBWriteF, .sfencevmaM, .ITLBMissF, // pmp/pma (inside mmu) signals. .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .InstrAccessFaultF, .InstrUpdateDAF); @@ -231,7 +232,8 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .STATUS_MXR, // from csr .STATUS_SUM, // from csr .STATUS_MPRV, // from csr - .STATUS_MPP, // from csr + .STATUS_MPP, // from csr + .ENVCFG_PBMTE, // from csr .sfencevmaM, // connects to privilege .DCacheStallM, // connects to privilege .LoadPageFaultM, // connects to privilege @@ -294,7 +296,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .PrivilegeModeW, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .STATUS_FS, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .FRM_REGW, .ENVCFG_CBE, .BreakpointFaultM, .EcallFaultM, .wfiM, .IntPendingM, .BigEndianM); + .FRM_REGW, .ENVCFG_CBE, .ENVCFG_PBMTE, .BreakpointFaultM, .EcallFaultM, .wfiM, .IntPendingM, .BigEndianM); end else begin assign CSRReadValW = 0; assign UnalignedPCNextF = PC2NextF; diff --git a/testbench/common/riscvassertions.sv b/testbench/common/riscvassertions.sv index d1007ec41..a6cee910e 100644 --- a/testbench/common/riscvassertions.sv +++ b/testbench/common/riscvassertions.sv @@ -58,9 +58,11 @@ module riscvassertions import cvw::*; #(parameter cvw_t P); assert ((P.ZMMUL_SUPPORTED == 0) || (P.M_SUPPORTED ==0)) else $error("At most one of ZMMUL_SUPPORTED and M_SUPPORTED can be enabled"); assert ((P.ZICNTR_SUPPORTED == 0) || (P.ZICSR_SUPPORTED == 1)) else $error("ZICNTR_SUPPORTED requires ZICSR_SUPPORTED"); assert ((P.ZIHPM_SUPPORTED == 0) || (P.ZICNTR_SUPPORTED == 1)) else $error("ZIPHM_SUPPORTED requires ZICNTR_SUPPORTED"); - assert ((P.ZICBOM_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOM required DCACHE_SUPPORTED"); - assert ((P.ZICBOZ_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOZ required DCACHE_SUPPORTED"); - assert ((P.ZICBOP_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOP required DCACHE_SUPPORTED"); + assert ((P.ZICBOM_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOM requires DCACHE_SUPPORTED"); + assert ((P.ZICBOZ_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOZ requires DCACHE_SUPPORTED"); + assert ((P.ZICBOP_SUPPORTED == 0) || (P.DCACHE_SUPPORTED == 1)) else $error("ZICBOP requires DCACHE_SUPPORTED"); + assert ((P.SVPBMT_SUPPORTED == 0) || (P.VIRTMEM_SUPPORTED == 1 && P.XLEN==64)) else $error("SVPBMT requires VIRTMEM_SUPPORTED and RV64"); + assert ((P.SVNAPOT_SUPPORTED == 0) || (P.VIRTMEM_SUPPORTED == 1 && P.XLEN==64)) else $error("SVNAPOT requires VIRTMEM_SUPPORTED and RV64"); end endmodule diff --git a/testbench/tests.vh b/testbench/tests.vh index 51d5c00b4..88a862d0e 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1936,6 +1936,7 @@ string arch64zbs[] = '{ string wally64priv[] = '{ `WALLYTEST, "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S", + "rv64i_m/privilege/src/WALLY-cboz-01.S", "rv64i_m/privilege/src/WALLY-cbom-01.S", "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S", "rv64i_m/privilege/src/WALLY-mie-01.S", @@ -2030,6 +2031,7 @@ string arch64zbs[] = '{ "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S", "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S", "rv32i_m/privilege/src/WALLY-cbom-01.S", + "rv32i_m/privilege/src/WALLY-cboz-01.S", "rv32i_m/privilege/src/WALLY-mie-01.S", "rv32i_m/privilege/src/WALLY-minfo-01.S", "rv32i_m/privilege/src/WALLY-misa-01.S", diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index 76c188413..aa9c8b50b 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -72,7 +72,7 @@ sretdone: li a0, 3 ecall # in M-mode li t0, 32 - csrs sip, t0 + csrs mip, t0 li a0, 1 ecall # in S-mode and expects stimer interrupt to occur li a0, 3 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag index 472157f0d..837668c3c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag @@ -58,6 +58,7 @@ target_tests_nosim = \ WALLY-plic-01 \ WALLY-uart-01 \ WALLY-cbom-01 \ + WALLY-cboz-01 \ rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests)) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cbom-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cbom-01.reference_output new file mode 100644 index 000000000..faf3bf658 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cbom-01.reference_output @@ -0,0 +1,428 @@ +deadbeef # begin_signature +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef # destination 1 +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +00000000 # destination 2 +00000001 +00000002 +00000003 +00000004 +00000005 +00000006 +00000007 +00000008 +00000009 +0000000a +0000000b +0000000c +0000000d +0000000e +0000000f +00000010 +00000011 +00000012 +00000013 +00000014 +00000015 +00000016 +00000017 +00000018 +00000019 +0000001a +0000001b +0000001c +0000001d +0000001e +0000001f +00000020 +00000021 +00000022 +00000023 +00000024 +00000025 +00000026 +00000027 +00000028 +00000029 +0000002a +0000002b +0000002c +0000002d +0000002e +0000002f +00000030 +00000031 +00000032 +00000033 +00000034 +00000035 +00000036 +00000037 +00000038 +00000039 +0000003a +0000003b +0000003c +0000003d +0000003e +0000003f +00000040 +00000041 +00000042 +00000043 +00000044 +00000045 +00000046 +00000047 +00000048 +00000049 +0000004a +0000004b +0000004c +0000004d +0000004e +0000004f +00000050 +00000051 +00000052 +00000053 +00000054 +00000055 +00000056 +00000057 +00000058 +00000059 +0000005a +0000005b +0000005c +0000005d +0000005e +0000005f +00000060 +00000061 +00000062 +00000063 +00000064 +00000065 +00000066 +00000067 +00000068 +00000069 +0000006a +0000006b +0000006c +0000006d +0000006e +0000006f +00000070 +00000071 +00000072 +00000073 +00000074 +00000075 +00000076 +00000077 +00000078 +00000079 +0000007a +0000007b +0000007c +0000007d +0000007e +0000007f +00000000 # destination 3 +00000001 +00000002 +00000003 +00000004 +00000005 +00000006 +00000007 +00000008 +00000009 +0000000a +0000000b +0000000c +0000000d +0000000e +0000000f +00000010 +00000011 +00000012 +00000013 +00000014 +00000015 +00000016 +00000017 +00000018 +00000019 +0000001a +0000001b +0000001c +0000001d +0000001e +0000001f +00000020 +00000021 +00000022 +00000023 +00000024 +00000025 +00000026 +00000027 +00000028 +00000029 +0000002a +0000002b +0000002c +0000002d +0000002e +0000002f +00000030 +00000031 +00000032 +00000033 +00000034 +00000035 +00000036 +00000037 +00000038 +00000039 +0000003a +0000003b +0000003c +0000003d +0000003e +0000003f +00000040 +00000041 +00000042 +00000043 +00000044 +00000045 +00000046 +00000047 +00000048 +00000049 +0000004a +0000004b +0000004c +0000004d +0000004e +0000004f +00000050 +00000051 +00000052 +00000053 +00000054 +00000055 +00000056 +00000057 +00000058 +00000059 +0000005a +0000005b +0000005c +0000005d +0000005e +0000005f +00000060 +00000061 +00000062 +00000063 +00000064 +00000065 +00000066 +00000067 +00000068 +00000069 +0000006a +0000006b +0000006c +0000006d +0000006e +0000006f +00000070 +00000071 +00000072 +00000073 +00000074 +00000075 +00000076 +00000077 +00000078 +00000079 +0000007a +0000007b +0000007c +0000007d +0000007e +0000007f +ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies. +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +0bad0bad # controls +0bad0bad +0bad0bad diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cboz-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cboz-01.reference_output new file mode 100644 index 000000000..ef91aa32f --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cboz-01.reference_output @@ -0,0 +1,188 @@ +deadbeef # begin_signature +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +00000000 # destination 1 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 # destination 2 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies. +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +0bad0bad +0bad0bad diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cbom-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cbom-01.S new file mode 100644 index 000000000..3c129b998 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cbom-01.S @@ -0,0 +1,472 @@ +/////////////////////////////////////////// +// +// WALLY-cache-management-tests +// invalidate, clean, and flush +// +// Author: Rose Thompson +// +// Created 18 August 2023 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# Purpose: Tests the 3 Zicbom cache instructions which all operate on cacheline +# granularity blocks of memory. Invalidate: Clears valid and dirty bits +# and does not write back. Clean: Writes back dirty cacheline if needed +# and clears dirty bit. Does NOT clear valid bit. Flush: Cleans and then +# Invalidates. These operations apply to all caches in the memory system. +# The tests are divided into three parts one for the data cache, instruction cache +# and checks to verify the uncached regions of memory cause exceptions. +# ----------- +# Copyright (c) 2020. RISC-V International. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# ----------- +# +# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension. +# + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV32I_Zicbom") +# Test code region +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.inval) + +RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n") + +CBOMTest: + # *** TODO + # first need to discover the length of the cacheline. + # for now assume it is 64 bytes + + #addi sp, sp, -16 + #sd s0, 0(sp) + #sd ra, 8(sp) + + la s0, signature + + ################################################################################ + # INVALIDATE D$ + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Invalidate the second region + # 4. Verify the second region has the original invalid data + # DON'T batch each step. We want to see the transition between cachelines. The current should be invalidated + # but the next should have the copied data. + + # step 1 +CBOMTest_inval_step1: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcpy4 + + # step 2 +CBOMTest_inval_step2: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOMTest_inval_step3: + la a1, Destination1 + cbo.inval (a1) + # step 4 (should be Invalid) + la a0, DeadBeafData1 + la a1, Destination1 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 4 next line (should still be valid) +CBOMTest_inval_step4: + la a0, SourceData+64 + la a1, Destination1+64 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 (Invalidate all remaining lines) +CBOMTest_inval_step3_all: + la a1, Destination1+64 + cbo.inval (a1) + cbo.inval (a1) # verify invalidating an already non present line does not cause an issue. + la a1, Destination1+128 + cbo.inval (a1) + la a1, Destination1+192 + cbo.inval (a1) + la a1, Destination1+256 + cbo.inval (a1) + la a1, Destination1+320 + cbo.inval (a1) + la a1, Destination1+384 + cbo.inval (a1) + la a1, Destination1+448 + cbo.inval (a1) + + # step 4 All should be invalid +CBOMTest_inval_step4_all: + la a0, DeadBeafData1 + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Clean D$ + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Invalidate the second region + # 4. Verify the second region has the original invalid data + # 5. Repeat step 1 + # 6. Clean cachelines + # 7. Verify the second region has the same data + # 8. Invalidate the second region + # 9. Verify again but this time it should contain the same data + # DON'T batch each step. We want to see the transition between cachelines. The current should be invalidated + # but the next should have the copied data. + + # step 1 +CBOMTest_clean_step1: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcpy4 + + # step 2 +CBOMTest_clean_step2: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOMTest_clean_step3: + la a1, Destination2 + cbo.inval (a1) + la a1, Destination2+64 + cbo.inval (a1) + la a1, Destination2+128 + cbo.inval (a1) + la a1, Destination2+192 + cbo.inval (a1) + la a1, Destination2+256 + cbo.inval (a1) + la a1, Destination2+320 + cbo.inval (a1) + la a1, Destination2+384 + cbo.inval (a1) + la a1, Destination2+448 + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + + # step 4 All should be invalid +CBOMTest_clean_step4: + la a0, DeadBeafData1 + la a1, Destination2 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 5 +CBOMTest_clean_step5: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcpy4 + + # step 6 only clean 1 line +CBOMTest_clean_step6: + la a1, Destination2 + cbo.clean (a1) + + # step 7 only check that 1 line +CBOMTest_clean_step7: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 8 invalidate that 1 line and the next +CBOMTest_clean_step8: + la a1, Destination2 + cbo.inval (a1) + la a1, Destination2+64 + cbo.inval (a1) + + # step 9 that 1 line should contain the valid data +CBOMTest_clean_step9_line1: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 9 the next should contain the invalid data +CBOMTest_clean_step9_line2: + la a0, DeadBeafData1 + la a1, Destination2+64 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 5 # now recopy the one we just corrupted +CBOMTest_clean_step5_recopy_line2: + la a0, SourceData+64 + la a1, Destination2+64 + li a2, 16 + jal ra, memcpy4 + + # step 6 # clean the remaining +CBOMTest_clean_step6_clean_all: + la a1, Destination2+64 + cbo.clean (a1) + la a1, Destination2+128 + cbo.clean (a1) + la a1, Destination2+192 + cbo.clean (a1) + la a1, Destination2+256 + cbo.clean (a1) + la a1, Destination2+320 + cbo.clean (a1) + la a1, Destination2+384 + cbo.clean (a1) + la a1, Destination2+448 + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + + # step 8 # invalidate all remaining +CBOMTest_clean_step7_invalidate_all: + la a1, Destination2 + cbo.inval (a1) + la a1, Destination2+64 + cbo.inval (a1) + la a1, Destination2+128 + cbo.inval (a1) + la a1, Destination2+192 + cbo.inval (a1) + la a1, Destination2+256 + cbo.inval (a1) + la a1, Destination2+320 + cbo.inval (a1) + la a1, Destination2+384 + cbo.inval (a1) + la a1, Destination2+448 + cbo.inval (a1) + + # step 9 # check all +CBOMTest_clean_step9_check_all: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Flush D$ line + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. For flush there is no way to create a negative control. We will flush 1 cache line + # 4. Verify whole region + # 5. Flush the remaining lines + # 6. Verify whole region + + # step 1 +CBOMTest_flush_step1: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcpy4 + + # step 2 All should be valid +CBOMTest_flush_step2_verify: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 # flush 1 line +CBOMTest_flush_step3: + la a1, Destination3 + cbo.flush (a1) + + # step 4 +CBOMTest_flush_step4_verify: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 5 +CBOMTest_flush_step5_flush_all: + la a1, Destination3 + cbo.flush (a1) + la a1, Destination3+64 + cbo.flush (a1) + la a1, Destination3+128 + cbo.flush (a1) + la a1, Destination3+192 + cbo.flush (a1) + la a1, Destination3+256 + cbo.flush (a1) + la a1, Destination3+320 + cbo.flush (a1) + la a1, Destination3+384 + cbo.flush (a1) + la a1, Destination3+448 + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + + # step 6 +CBOMTest_flush_step6_verify: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + + #lw s0, 0(sp) + #lw ra, 8(sp) + #addi sp, sp, 16 + #ret +RVMODEL_HALT + + +.type memcpy4, @function +memcpy4: + # a0 is the source + # a1 is the dst + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy4_loop: + lw t3, 0(t0) + sw t3, 0(t1) + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcpy4_loop + ret + +.type memcmp4, @function +# returns which index mismatch, -1 if none +memcmp4: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp4_loop: + lw t3, 0(t0) + lw t4, 0(t1) + bne t3, t4, memcmp4_ne + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcmp4_loop + li a0, -1 + ret +memcmp4_ne: + mv a0, t2 + ret + +RVTEST_CODE_END + + +RVTEST_DATA_BEGIN +# Input data section. +#.data +.align 7 + +DeadBeafData1: + .fill 128, 4, 0xdeadbeef +SourceData: + .int 0, 1, 2, 3, 4, 5, 6, 7 + .int 8, 9, 10, 11, 12, 13, 14, 15 + .int 16, 17, 18, 19, 20, 21, 22, 23 + .int 24, 25, 26, 27, 28, 29, 30, 31 + .int 32, 33, 34, 35, 36, 37, 38, 39 + .int 40, 41, 42, 43, 44, 45, 46, 47 + .int 48, 49, 50, 51, 52, 53, 54, 55 + .int 56, 57, 58, 59, 60, 61, 62, 63 + .int 64, 65, 66, 67, 68, 69, 70, 71 + .int 72, 73, 74, 75, 76, 77, 78, 79 + .int 80, 81, 82, 83, 84, 85, 86, 87 + .int 88, 89, 90, 91, 92, 93, 94, 95 + .int 96, 97, 98, 99, 100, 101, 102, 103 + .int 104, 105, 106, 107, 108, 109, 110, 111 + .int 112, 113, 114, 115, 116, 117, 118, 119 + .int 120, 121, 122, 123, 124, 125, 126, 127 + +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + .fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert + # 4 bytes. This needs to be aligned to a cacheline + + .align 6 +Destination1: + .fill 128, 4, 0xdeadbeef +Destination2: + .fill 128, 4, 0xdeadbeef +Destination3: + .fill 128, 4, 0xdeadbeef +signature: + .fill 16, 4, 0x0bad0bad + +RVMODEL_DATA_END + diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cboz-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cboz-01.S new file mode 100644 index 000000000..207c727ec --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cboz-01.S @@ -0,0 +1,377 @@ +/////////////////////////////////////////// +// +// WALLY-cache-management-tests +// invalidate, clean, and flush +// +// Author: Rose Thompson +// +// Created 22 August 2023 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# Purpose: Tests the Zicboz cache instruction which all operate on cacheline +# granularity blocks of memory. The instruction cbo.zero allocates a cacheline +# and writes 0 to each byte. A dirty cacheline is overwritten, any data in main +# memory is over written. +# ----------- +# Copyright (c) 2020. RISC-V International. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# ----------- +# +# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension. +# + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV32I_Zicboz_Zicbom") +# Test code region +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.zero) + +RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n") + +CBOZTest: + # *** TODO + # first need to discover the length of the cacheline. + # for now assume it is 64 bytes + + #addi sp, sp, -16 + #sd s0, 0(sp) + #sd ra, 8(sp) + + la s0, signature + + ################################################################################ + # Zero cache line hit overwrites + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Zero that region of memory + # 4. Verify the second region is all zero. + + # step 1 +CBOZTest_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcpy4 + + # step 2 +CBOZTest_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOZTest_zero_step3: + la a1, Destination1 + cbo.zero (a1) + la a1, Destination1+64 + cbo.zero (a1) + la a1, Destination1+128 + cbo.zero (a1) + la a1, Destination1+192 + cbo.zero (a1) + la a1, Destination1+256 + cbo.zero (a1) + la a1, Destination1+320 + cbo.zero (a1) + la a1, Destination1+384 + cbo.zero (a1) + la a1, Destination1+448 + cbo.zero (a1) + +CBOZTest_zero_step4: + # step 4 (should be zero) + la a0, ZeroData + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Verify cbo.zero miss overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Flush that one line + # 4. Zero that one line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_miss_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 16 + jal ra, memcpy4 + + # step 2 +CBOZTest_miss_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOZTest_miss_zero_step3: + la a1, Destination1 + cbo.flush (a1) + cbo.zero (a1) + +CBOZTest_miss_zero_step4: + # step 4 (should be Invalid) + la a0, ZeroData + la a1, Destination1 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Verify cbo.zero miss with eviction overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Repeate 1 four times at 4KiB intervals + # 2. Then verify the second region has the same data + # 4. Zero each line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_eviction_zero_step1_0: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 16 + jal ra, memcpy4 + + # step 3 +CBOZTest_eviction_zero_step3_0: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 4 +CBOZTest_eviction_zero_step4: + la a1, Destination2 + cbo.zero (a1) + la a1, Destination2+4096 + cbo.zero (a1) + la a1, Destination2+8192 + cbo.zero (a1) + la a1, Destination2+12288 + cbo.zero (a1) + la a1, Destination2+16384 + cbo.zero (a1) + +CBOZTest_eviction_zero_step5_0: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_4096: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+4096 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_8192: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+8192 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_12288: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+12288 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_16384: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+16384 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + + #ld s0, 0(sp) + #ld ra, 8(sp) + #addi sp, sp, 16 + #ret +RVMODEL_HALT + + +.type memcpy4, @function +memcpy4: + # a0 is the source + # a1 is the dst + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy4_loop: + lw t3, 0(t0) + sw t3, 0(t1) + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcpy4_loop + ret + +.type memcmp4, @function +# returns which index mismatch, -1 if none +memcmp4: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp4_loop: + lw t3, 0(t0) + lw t4, 0(t1) + bne t3, t4, memcmp4_ne + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcmp4_loop + li a0, -1 + ret +memcmp4_ne: + mv a0, t2 + ret + +RVTEST_CODE_END + + +RVTEST_DATA_BEGIN +# Input data section. +#.data +.align 7 + +ZeroData: + .fill 128, 4, 0x0 +SourceData: + .int 0, 1, 2, 3, 4, 5, 6, 7 + .int 8, 9, 10, 11, 12, 13, 14, 15 + .int 16, 17, 18, 19, 20, 21, 22, 23 + .int 24, 25, 26, 27, 28, 29, 30, 31 + .int 32, 33, 34, 35, 36, 37, 38, 39 + .int 40, 41, 42, 43, 44, 45, 46, 47 + .int 48, 49, 50, 51, 52, 53, 54, 55 + .int 56, 57, 58, 59, 60, 61, 62, 63 + .int 64, 65, 66, 67, 68, 69, 70, 71 + .int 72, 73, 74, 75, 76, 77, 78, 79 + .int 80, 81, 82, 83, 84, 85, 86, 87 + .int 88, 89, 90, 91, 92, 93, 94, 95 + .int 96, 97, 98, 99, 100, 101, 102, 103 + .int 104, 105, 106, 107, 108, 109, 110, 111 + .int 112, 113, 114, 115, 116, 117, 118, 119 + .int 120, 121, 122, 123, 124, 125, 126, 127 + +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + .fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert + # 4 bytes. This needs to be aligned to a cacheline + + .align 6 +Destination1: + .fill 128, 4, 0xdeadbeef +Destination2: + .fill 16, 4, 0xdeadbeef +signature: + .fill 16, 4, 0x0bad0bad + +RVMODEL_DATA_END + diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag index 6b13612ce..bc5f454bb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag @@ -57,6 +57,7 @@ target_tests_nosim = \ WALLY-status-fp-enabled-01 \ WALLY-wfi-01 \ WALLY-cbom-01 \ + WALLY-cboz-01 \ # unclear why status-fp-enabled and wfi aren't simulating ok diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output new file mode 100644 index 000000000..644fa6f0b --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output @@ -0,0 +1,204 @@ +deadbeef # begin_signature +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +00000000 # destination 1 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 # destination 2 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies. +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +0bad0bad +0bad0bad +0bad0bad +0bad0bad diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S new file mode 100644 index 000000000..22b076261 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S @@ -0,0 +1,377 @@ +/////////////////////////////////////////// +// +// WALLY-cache-management-tests +// invalidate, clean, and flush +// +// Author: Rose Thompson +// +// Created 22 August 2023 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# Purpose: Tests the Zicboz cache instruction which all operate on cacheline +# granularity blocks of memory. The instruction cbo.zero allocates a cacheline +# and writes 0 to each byte. A dirty cacheline is overwritten, any data in main +# memory is over written. +# ----------- +# Copyright (c) 2020. RISC-V International. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# ----------- +# +# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension. +# + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV64I_Zicboz_Zicbom") +# Test code region +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.zero) + +RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n") + +CBOZTest: + # *** TODO + # first need to discover the length of the cacheline. + # for now assume it is 64 bytes + + #addi sp, sp, -16 + #sd s0, 0(sp) + #sd ra, 8(sp) + + la s0, signature + + ################################################################################ + # Zero cache line hit overwrites + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Zero that region of memory + # 4. Verify the second region is all zero. + + # step 1 +CBOZTest_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 64 + jal ra, memcpy8 + + # step 2 +CBOZTest_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 64 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 3 +CBOZTest_zero_step3: + la a1, Destination1 + cbo.zero (a1) + la a1, Destination1+64 + cbo.zero (a1) + la a1, Destination1+128 + cbo.zero (a1) + la a1, Destination1+192 + cbo.zero (a1) + la a1, Destination1+256 + cbo.zero (a1) + la a1, Destination1+320 + cbo.zero (a1) + la a1, Destination1+384 + cbo.zero (a1) + la a1, Destination1+448 + cbo.zero (a1) + +CBOZTest_zero_step4: + # step 4 (should be zero) + la a0, ZeroData + la a1, Destination1 + li a2, 64 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + ################################################################################ + # Verify cbo.zero miss overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Flush that one line + # 4. Zero that one line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_miss_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 8 + jal ra, memcpy8 + + # step 2 +CBOZTest_miss_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 3 +CBOZTest_miss_zero_step3: + la a1, Destination1 + cbo.flush (a1) + cbo.zero (a1) + +CBOZTest_miss_zero_step4: + # step 4 (should be Invalid) + la a0, ZeroData + la a1, Destination1 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + ################################################################################ + # Verify cbo.zero miss with eviction overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Repeate 1 four times at 4KiB intervals + # 2. Then verify the second region has the same data + # 4. Zero each line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_eviction_zero_step1_0: + la a0, SourceData + la a1, Destination2 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 8 + jal ra, memcpy8 + + # step 3 +CBOZTest_eviction_zero_step3_0: + la a0, SourceData + la a1, Destination2 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 4 +CBOZTest_eviction_zero_step4: + la a1, Destination2 + cbo.zero (a1) + la a1, Destination2+4096 + cbo.zero (a1) + la a1, Destination2+8192 + cbo.zero (a1) + la a1, Destination2+12288 + cbo.zero (a1) + la a1, Destination2+16384 + cbo.zero (a1) + +CBOZTest_eviction_zero_step5_0: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_4096: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+4096 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_8192: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+8192 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_12288: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+12288 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_16384: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+16384 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + + #ld s0, 0(sp) + #ld ra, 8(sp) + #addi sp, sp, 16 + #ret +RVMODEL_HALT + + +.type memcpy8, @function +memcpy8: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy8_loop: + ld t3, 0(t0) + sd t3, 0(t1) + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_loop + ret + +.type memcmp8, @function +# returns which index mismatch, -1 if none +memcmp8: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp8_loop: + ld t3, 0(t0) + ld t4, 0(t1) + bne t3, t4, memcmp8_ne + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcmp8_loop + li a0, -1 + ret +memcmp8_ne: + mv a0, t2 + ret + +RVTEST_CODE_END + + +RVTEST_DATA_BEGIN +# Input data section. +#.data +.align 7 + +ZeroData: + .fill 128, 4, 0x0 +SourceData: + .int 0, 1, 2, 3, 4, 5, 6, 7 + .int 8, 9, 10, 11, 12, 13, 14, 15 + .int 16, 17, 18, 19, 20, 21, 22, 23 + .int 24, 25, 26, 27, 28, 29, 30, 31 + .int 32, 33, 34, 35, 36, 37, 38, 39 + .int 40, 41, 42, 43, 44, 45, 46, 47 + .int 48, 49, 50, 51, 52, 53, 54, 55 + .int 56, 57, 58, 59, 60, 61, 62, 63 + .int 64, 65, 66, 67, 68, 69, 70, 71 + .int 72, 73, 74, 75, 76, 77, 78, 79 + .int 80, 81, 82, 83, 84, 85, 86, 87 + .int 88, 89, 90, 91, 92, 93, 94, 95 + .int 96, 97, 98, 99, 100, 101, 102, 103 + .int 104, 105, 106, 107, 108, 109, 110, 111 + .int 112, 113, 114, 115, 116, 117, 118, 119 + .int 120, 121, 122, 123, 124, 125, 126, 127 + +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + .fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert + # 4 bytes. This needs to be aligned to a cacheline + + .align 6 +Destination1: + .fill 128, 4, 0xdeadbeef +Destination2: + .fill 16, 4, 0xdeadbeef +signature: + .fill 32, 4, 0x0bad0bad + +RVMODEL_DATA_END +