/////////////////////////////////////////// // cache (data cache) // // Written: ross1728@gmail.com July 07, 2021 // Implements the L1 data cache // // Purpose: Storage for data and meta data. // // A component of the Wally configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // // Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file // except in compliance with the License, or, at your option, the Apache License version 2.0. You // may obtain a copy of the License at // // https://solderpad.org/licenses/SHL-2.1/ // // Unless required by applicable law or agreed to in writing, any work distributed under the // License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) ( input logic clk, input logic reset, input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) // cpu side input logic [1:0] CacheRW, // [1] Read, [0] Write input logic [1:0] CacheAtomic, // Atomic operation input logic FlushCache, // Flush all dirty lines back to memory input logic InvalidateCache, // Clear all valid bits input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] PAdr, // Physical address input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only) input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only) output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted output logic CacheStall, // Cache stalls pipeline during multicycle operation output logic [WORDLEN-1:0] ReadDataWord, // Word read from cache (goes to CPU and bus) // to performance counters to cpu output logic CacheMiss, // Cache miss output logic CacheAccess, // Cache access // lsu control input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr // Bus fsm interface input logic CacheBusAck, // Bus operation completed input logic SelBusBeat, // Word in cache line comes from BeatCount input logic [LOGBWPL-1:0] BeatCount, // Beat in burst input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus output logic [1:0] CacheBusRW, // [1] Read or [0] write bus output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access ); // Cache parameters localparam LINEBYTELEN = LINELEN/8; // Line length in bytes localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field localparam SETLEN = $clog2(NUMLINES); // Number of set bits localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits localparam WORDSPERLINE = LINELEN/WORDLEN; // Number of words in cache line localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; // *** see if this is the same as WORDSPERLINE localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); // *** logic SelAdr; logic [1:0] AdrSelMuxSel; logic [SETLEN-1:0] CAdr; logic [LINELEN-1:0] LineWriteData; logic ClearValid, ClearDirty, SetDirty, SetValid; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] HitWay, ValidWay; logic CacheHit; logic [NUMWAYS-1:0] VictimWay, DirtyWay; logic LineDirty; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; logic [TAGLEN-1:0] Tag; logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1; logic FlushAdrCntEn, FlushCntRst; logic FlushAdrFlag, FlushWayFlag; logic [NUMWAYS-1:0] FlushWay, NextFlushWay; logic FlushWayCntEn; logic SelWriteback; logic LRUWriteEn; logic SelFlush; logic ResetOrFlushCntRst; logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic SelFetchBuffer; logic CacheEn; logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; genvar index; ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path ///////////////////////////////////////////////////////////////////////////////////////////// // Choose read address (CAdr). Normally use NextAdr, but use PAdr during stalls // and FlushAdr when handling D$ flushes // The icache must update to the newest PCNextF on flush as it is probably a trap. Trap // sets PCNextF to XTVEC and the icache must start reading the instruction. assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}; mux3 #(SETLEN) AdrSelMux(.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr), .s(AdrSelMuxSel), .y(CAdr)); // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches if(NUMWAYS > 1) begin:vict cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU( .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage), .SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache); end else assign VictimWay = 1'b1; // one hot. assign CacheHit = |HitWay; assign LineDirty = |DirtyWay; // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache)); or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag)); // Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines if(DCACHE) mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), .d1(BeatCount), .s(SelBusBeat), .y(WordOffsetAddr)); else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; // Bypass cache array to save a cycle when finishing a load miss mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine); // Select word from cache line subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread( .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord); // Bus address for fetch, writeback, or flush writeback mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path ///////////////////////////////////////////////////////////////////////////////////////////// // Adjust byte mask from word to cache line onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); for(index = 0; index < 2**LOGCWPL; index++) begin assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; end assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; // Merge write data into fetched cache line for store miss for(index = 0; index < LINELEN/8; index++) begin mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index])); end ///////////////////////////////////////////////////////////////////////////////////////////// // Flush logic ///////////////////////////////////////////////////////////////////////////////////////////// // Flush address (line number) assign ResetOrFlushCntRst = reset | FlushCntRst; flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr); mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr); assign FlushAdrP1 = NextFlushAdr + 1'b1; assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]); // Flush way flopenl #(NUMWAYS) FlushWayReg(clk, ResetOrFlushCntRst, FlushWayCntEn, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay); if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; else assign NextFlushWay = FlushWay[NUMWAYS-1]; assign FlushWayFlag = FlushWay[NUMWAYS-1]; ///////////////////////////////////////////////////////////////////////////////////////////// // Cache FSM ///////////////////////////////////////////////////////////////////////////////////////////// cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .CacheAtomic, .Stall, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CacheEn, .LRUWriteEn); endmodule