Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2023-01-09 00:18:11 -06:00
commit 816006ac1b

View File

@ -1,114 +1,102 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// cache (data cache) // cache
// //
// Written: ross1728@gmail.com July 07, 2021 // Written: ross1728@gmail.com July 07, 2021
// Implements the L1 data cache // Implements the L1 instruction/data cache
// //
// Purpose: Storage for data and meta data. // Purpose: Storage for data and meta data.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
// //
// MIT LICENSE // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
// //
// The above copyright notice and this permission notice shall be included in all copies or // Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// substantial portions of the Software. // except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // https://solderpad.org/licenses/SHL-2.1/
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR //
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS // Unless required by applicable law or agreed to in writing, any work distributed under the
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // either express or implied. See the License for the specific language governing permissions
// OR OTHER DEALINGS IN THE SOFTWARE. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) ( module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
input logic clk, input logic clk,
input logic reset, input logic reset,
// cpu side input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
input logic FlushStage, input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
input logic Stall, // cpu side
input logic [1:0] CacheRW, input logic [1:0] CacheRW, // [1] Read, [0] Write
input logic [1:0] CacheAtomic, input logic [1:0] CacheAtomic, // Atomic operation
input logic FlushCache, input logic FlushCache, // Flush all dirty lines back to memory
input logic InvalidateCache, input logic InvalidateCache, // Clear all valid bits
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // physical address input logic [`PA_BITS-1:0] PAdr, // Physical address
input logic [(WORDLEN-1)/8:0] ByteMask, input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
input logic [WORDLEN-1:0] CacheWriteData, input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
output logic CacheCommitted, output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted
output logic CacheStall, output logic CacheStall, // Cache stalls pipeline during multicycle operation
// to performance counters to cpu output logic [WORDLEN-1:0] ReadDataWord, // Word read from cache (goes to CPU and bus)
output logic CacheMiss, // to performance counters to cpu
output logic CacheAccess, output logic CacheMiss, // Cache miss
// lsu control output logic CacheAccess, // Cache access
input logic SelHPTW, // lsu control
// Bus fsm interface input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr
output logic [1:0] CacheBusRW, // Bus fsm interface
input logic CacheBusAck, input logic CacheBusAck, // Bus operation completed
input logic SelBusBeat, input logic SelBusBeat, // Word in cache line comes from BeatCount
input logic [LOGBWPL-1:0] BeatCount, input logic [LOGBWPL-1:0] BeatCount, // Beat in burst
input logic [LINELEN-1:0] FetchBuffer, input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus
output logic [`PA_BITS-1:0] CacheBusAdr, output logic [1:0] CacheBusRW, // [1] Read or [0] write bus
output logic [WORDLEN-1:0] ReadDataWord); output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access
);
// Cache parameters // Cache parameters
localparam LINEBYTELEN = LINELEN/8; localparam LINEBYTELEN = LINELEN/8; // Line length in bytes
localparam OFFSETLEN = $clog2(LINEBYTELEN); localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field
localparam SETLEN = $clog2(NUMLINES); localparam SETLEN = $clog2(NUMLINES); // Number of set bits
localparam SETTOP = SETLEN+OFFSETLEN; localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits
localparam TAGLEN = `PA_BITS - SETTOP; localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits
localparam WORDSPERLINE = LINELEN/WORDLEN; localparam WORDSPERLINE = LINELEN/WORDLEN; // Number of words in cache line
localparam FlushAdrThreshold = NUMLINES - 1; localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete
localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; // *** see if this is the same as WORDSPERLINE
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); // ***
logic SelAdr; logic SelAdr;
logic [SETLEN-1:0] CAdr; logic [1:0] AdrSelMuxSel;
logic [LINELEN-1:0] LineWriteData; logic [SETLEN-1:0] CAdr;
logic ClearValid; logic [LINELEN-1:0] LineWriteData;
logic ClearDirty; logic ClearValid, ClearDirty, SetDirty, SetValid;
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
logic [NUMWAYS-1:0] HitWay, ValidWay; logic [NUMWAYS-1:0] HitWay, ValidWay;
logic CacheHit; logic CacheHit;
logic SetDirty; logic [NUMWAYS-1:0] VictimWay, DirtyWay;
logic SetValid; logic LineDirty;
logic [NUMWAYS-1:0] VictimWay; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0];
logic [NUMWAYS-1:0] DirtyWay; logic [TAGLEN-1:0] Tag;
logic LineDirty; logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1;
logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; logic FlushAdrCntEn, FlushCntRst;
logic [TAGLEN-1:0] Tag; logic FlushAdrFlag, FlushWayFlag;
logic [SETLEN-1:0] FlushAdr; logic [NUMWAYS-1:0] FlushWay, NextFlushWay;
logic [SETLEN-1:0] NextFlushAdr; logic FlushWayCntEn;
logic [SETLEN-1:0] FlushAdrP1; logic SelWriteback;
logic FlushAdrCntEn; logic LRUWriteEn;
logic FlushCntRst; logic SelFlush;
logic FlushAdrFlag; logic ResetOrFlushCntRst;
logic FlushWayFlag; logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
logic [NUMWAYS-1:0] FlushWay; logic SelFetchBuffer;
logic [NUMWAYS-1:0] NextFlushWay; logic CacheEn;
logic FlushWayCntEn; logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
logic SelWriteback; logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel;
logic LRUWriteEn;
logic SelFlush;
logic ResetOrFlushCntRst;
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
logic SelFetchBuffer;
logic CacheEn;
localparam LOGLLENBYTES = $clog2(WORDLEN/8);
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel;
genvar index; genvar index;
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
@ -119,90 +107,99 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
// and FlushAdr when handling D$ flushes // and FlushAdr when handling D$ flushes
// The icache must update to the newest PCNextF on flush as it is probably a trap. Trap // The icache must update to the newest PCNextF on flush as it is probably a trap. Trap
// sets PCNextF to XTVEC and the icache must start reading the instruction. // sets PCNextF to XTVEC and the icache must start reading the instruction.
mux3 #(SETLEN) AdrSelMux( assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))};
.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr), mux3 #(SETLEN) AdrSelMux(NextAdr[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
.s({SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}), .y(CAdr)); AdrSelMuxSel, CAdr);
// Array of cache ways, along with victim, hit, dirty, and read merging logic // Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0](
CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask, .clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
// Select victim way for associative caches
if(NUMWAYS > 1) begin:vict if(NUMWAYS > 1) begin:vict
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU( cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage), .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache); .SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
end else assign VictimWay = 1'b1; // one hot. end else
assign CacheHit = | HitWay; assign VictimWay = 1'b1; // one hot.
assign LineDirty = | DirtyWay;
assign CacheHit = |HitWay;
assign LineDirty = |DirtyWay;
// ReadDataLineWay is a 2d array of cache line len by number of ways. // ReadDataLineWay is a 2d array of cache line len by number of ways.
// Need to OR together each way in a bitwise manner. // Need to OR together each way in a bitwise manner.
// Final part of the AO Mux. First is the AND in the cacheway. // Final part of the AO Mux. First is the AND in the cacheway.
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache)); or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache));
or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag)); or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag));
// like to fix this. // Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines
if(DCACHE) if(DCACHE)
mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
.d1(BeatCount), .s(SelBusBeat), .d1(BeatCount), .s(SelBusBeat),
.y(WordOffsetAddr)); .y(WordOffsetAddr));
else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; else
assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
// Bypass cache array to save a cycle when finishing a load miss
mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine); mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine);
// Select word from cache line
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread( subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread(
.PAdr(WordOffsetAddr), .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord);
.ReadDataLine, .ReadDataWord);
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
/////////////////////////////////////////////////////////////////////////////////////////////
onehotdecoder #(LOGCWPL) adrdec(
.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
for(index = 0; index < 2**LOGCWPL; index++) begin
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
end
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
.d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
end
// Bus address for fetch, writeback, or flush writeback
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
.s({SelFlush, SelWriteback}), .y(CacheBusAdr)); .s({SelFlush, SelWriteback}), .y(CacheBusAdr));
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Flush address and way generation during flush // Write Path
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Adjust byte mask from word to cache line
onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
for(index = 0; index < 2**LOGCWPL; index++) begin
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
end
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
// Merge write data into fetched cache line for store miss
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
.d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
end
/////////////////////////////////////////////////////////////////////////////////////////////
// Flush logic
/////////////////////////////////////////////////////////////////////////////////////////////
// Flush address (line number)
assign ResetOrFlushCntRst = reset | FlushCntRst; assign ResetOrFlushCntRst = reset | FlushCntRst;
flopenr #(SETLEN) FlushAdrReg(.clk, .reset(ResetOrFlushCntRst), .en(FlushAdrCntEn), flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr);
.d(FlushAdrP1), .q(NextFlushAdr)); mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr);
assign FlushAdr = FlushAdrCntEn ? FlushAdrP1 : NextFlushAdr;
assign FlushAdrP1 = NextFlushAdr + 1'b1; assign FlushAdrP1 = NextFlushAdr + 1'b1;
assign FlushAdrFlag = (NextFlushAdr == FlushAdrThreshold[SETLEN-1:0]); assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]);
flopenl #(NUMWAYS) FlushWayReg(.clk, .load(ResetOrFlushCntRst), .en(FlushWayCntEn),
.val({{NUMWAYS-1{1'b0}}, 1'b1}), .d(NextFlushWay), .q(FlushWay)); // Flush way
assign FlushWayFlag = FlushWay[NUMWAYS-1]; flopenl #(NUMWAYS) FlushWayReg(clk, FlushWayCntEn, ResetOrFlushCntRst, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay);
if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]};
else assign NextFlushWay = FlushWay[NUMWAYS-1]; else assign NextFlushWay = FlushWay[NUMWAYS-1];
assign FlushWayFlag = FlushWay[NUMWAYS-1];
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Cache FSM // Cache FSM
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck,
.FlushStage, .CacheRW, .CacheAtomic, .Stall, .FlushStage, .CacheRW, .CacheAtomic, .Stall,
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
.CacheMiss, .CacheAccess, .SelAdr, .CacheMiss, .CacheAccess, .SelAdr,
.ClearValid, .ClearDirty, .SetDirty, .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush,
.SetValid, .SelWriteback, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
.InvalidateCache, .InvalidateCache, .CacheEn, .LRUWriteEn);
.CacheEn,
.LRUWriteEn);
endmodule endmodule