cvw/pipelined/src/cache/cache.sv
2022-12-22 10:04:50 -08:00

207 lines
11 KiB
Systemverilog

///////////////////////////////////////////
// cache (data cache)
//
// Written: ross1728@gmail.com July 07, 2021
// Implements the L1 data cache
//
// Purpose: Storage for data and meta data.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
input logic clk,
input logic reset,
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
// cpu side
input logic [1:0] CacheRW, // [1] Read, [0] Write
input logic [1:0] CacheAtomic, // Atomic operation
input logic FlushCache, // Flush all dirty lines back to memory
input logic InvalidateCache, // Clear all valid bits
input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // Physical address
input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted
output logic CacheStall, // Cache stalls pipeline during multicycle operation
output logic [WORDLEN-1:0] ReadDataWord, // Word read from cache (goes to CPU and bus)
// to performance counters to cpu
output logic CacheMiss, // Cache miss
output logic CacheAccess, // Cache access
// lsu control
input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr
// Bus fsm interface
input logic CacheBusAck, // Bus operation completed
input logic SelBusBeat, // Word in cache line comes from BeatCount
input logic [LOGBWPL-1:0] BeatCount, // Beat in burst
input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus
output logic [1:0] CacheBusRW, // [1] Read or [0] write bus
output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access
);
// Cache parameters
localparam LINEBYTELEN = LINELEN/8; // Line length in bytes
localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field
localparam SETLEN = $clog2(NUMLINES); // Number of set bits
localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits
localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits
localparam WORDSPERLINE = LINELEN/WORDLEN; // Number of words in cache line
localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete
localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; // *** see if this is the same as WORDSPERLINE
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); // ***
logic SelAdr;
logic [1:0] AdrSelMuxSel;
logic [SETLEN-1:0] CAdr;
logic [LINELEN-1:0] LineWriteData;
logic ClearValid, ClearDirty, SetDirty, SetValid;
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
logic [NUMWAYS-1:0] HitWay, ValidWay;
logic CacheHit;
logic [NUMWAYS-1:0] VictimWay, DirtyWay;
logic LineDirty;
logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0];
logic [TAGLEN-1:0] Tag;
logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1;
logic FlushAdrCntEn, FlushCntRst;
logic FlushAdrFlag, FlushWayFlag;
logic [NUMWAYS-1:0] FlushWay, NextFlushWay;
logic FlushWayCntEn;
logic SelWriteback;
logic LRUWriteEn;
logic SelFlush;
logic ResetOrFlushCntRst;
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
logic SelFetchBuffer;
logic CacheEn;
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
genvar index;
/////////////////////////////////////////////////////////////////////////////////////////////
// Read Path
/////////////////////////////////////////////////////////////////////////////////////////////
// Choose read address (CAdr). Normally use NextAdr, but use PAdr during stalls
// and FlushAdr when handling D$ flushes
// The icache must update to the newest PCNextF on flush as it is probably a trap. Trap
// sets PCNextF to XTVEC and the icache must start reading the instruction.
assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))};
mux3 #(SETLEN) AdrSelMux(.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr),
.s(AdrSelMuxSel), .y(CAdr));
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0](
.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
// Select victim way for associative caches
if(NUMWAYS > 1) begin:vict
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
end else
assign VictimWay = 1'b1; // one hot.
assign CacheHit = |HitWay;
assign LineDirty = |DirtyWay;
// ReadDataLineWay is a 2d array of cache line len by number of ways.
// Need to OR together each way in a bitwise manner.
// Final part of the AO Mux. First is the AND in the cacheway.
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache));
or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag));
// Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines
if(DCACHE)
mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
.d1(BeatCount), .s(SelBusBeat),
.y(WordOffsetAddr));
else
assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
// Bypass cache array to save a cycle when finishing a load miss
mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine);
// Select word from cache line
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread(
.PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord);
// Bus address for fetch, writeback, or flush writeback
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
.s({SelFlush, SelWriteback}), .y(CacheBusAdr));
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path
/////////////////////////////////////////////////////////////////////////////////////////////
// Adjust byte mask from word to cache line
onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
for(index = 0; index < 2**LOGCWPL; index++) begin
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
end
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
// Merge write data into fetched cache line for store miss
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
.d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
end
/////////////////////////////////////////////////////////////////////////////////////////////
// Flush logic
/////////////////////////////////////////////////////////////////////////////////////////////
// Flush address (line number)
assign ResetOrFlushCntRst = reset | FlushCntRst;
flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr);
mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr);
assign FlushAdrP1 = NextFlushAdr + 1'b1;
assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]);
// Flush way
flopenl #(NUMWAYS) FlushWayReg(clk, ResetOrFlushCntRst, FlushWayCntEn, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay);
if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]};
else assign NextFlushWay = FlushWay[NUMWAYS-1];
assign FlushWayFlag = FlushWay[NUMWAYS-1];
/////////////////////////////////////////////////////////////////////////////////////////////
// Cache FSM
/////////////////////////////////////////////////////////////////////////////////////////////
cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck,
.FlushStage, .CacheRW, .CacheAtomic, .Stall,
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
.CacheMiss, .CacheAccess, .SelAdr,
.ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
.InvalidateCache, .CacheEn, .LRUWriteEn);
endmodule