Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2023-01-09 00:18:11 -06:00
commit 816006ac1b

View File

@ -1,31 +1,27 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// cache (data cache) // cache
// //
// Written: ross1728@gmail.com July 07, 2021 // Written: ross1728@gmail.com July 07, 2021
// Implements the L1 data cache // Implements the L1 instruction/data cache
// //
// Purpose: Storage for data and meta data. // Purpose: Storage for data and meta data.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
// //
// MIT LICENSE // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
// //
// The above copyright notice and this permission notice shall be included in all copies or // Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// substantial portions of the Software. // except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // https://solderpad.org/licenses/SHL-2.1/
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR //
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS // Unless required by applicable law or agreed to in writing, any work distributed under the
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // either express or implied. See the License for the specific language governing permissions
// OR OTHER DEALINGS IN THE SOFTWARE. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
@ -33,82 +29,74 @@
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) ( module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
// cpu side // cpu side
input logic FlushStage, input logic [1:0] CacheRW, // [1] Read, [0] Write
input logic Stall, input logic [1:0] CacheAtomic, // Atomic operation
input logic [1:0] CacheRW, input logic FlushCache, // Flush all dirty lines back to memory
input logic [1:0] CacheAtomic, input logic InvalidateCache, // Clear all valid bits
input logic FlushCache, input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits.
input logic InvalidateCache, input logic [`PA_BITS-1:0] PAdr, // Physical address
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
input logic [`PA_BITS-1:0] PAdr, // physical address input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
input logic [(WORDLEN-1)/8:0] ByteMask, output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted
input logic [WORDLEN-1:0] CacheWriteData, output logic CacheStall, // Cache stalls pipeline during multicycle operation
output logic CacheCommitted, output logic [WORDLEN-1:0] ReadDataWord, // Word read from cache (goes to CPU and bus)
output logic CacheStall,
// to performance counters to cpu // to performance counters to cpu
output logic CacheMiss, output logic CacheMiss, // Cache miss
output logic CacheAccess, output logic CacheAccess, // Cache access
// lsu control // lsu control
input logic SelHPTW, input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr
// Bus fsm interface // Bus fsm interface
output logic [1:0] CacheBusRW, input logic CacheBusAck, // Bus operation completed
input logic CacheBusAck, input logic SelBusBeat, // Word in cache line comes from BeatCount
input logic SelBusBeat, input logic [LOGBWPL-1:0] BeatCount, // Beat in burst
input logic [LOGBWPL-1:0] BeatCount, input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus
input logic [LINELEN-1:0] FetchBuffer, output logic [1:0] CacheBusRW, // [1] Read or [0] write bus
output logic [`PA_BITS-1:0] CacheBusAdr, output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access
output logic [WORDLEN-1:0] ReadDataWord); );
// Cache parameters // Cache parameters
localparam LINEBYTELEN = LINELEN/8; localparam LINEBYTELEN = LINELEN/8; // Line length in bytes
localparam OFFSETLEN = $clog2(LINEBYTELEN); localparam OFFSETLEN = $clog2(LINEBYTELEN); // Number of bits in offset field
localparam SETLEN = $clog2(NUMLINES); localparam SETLEN = $clog2(NUMLINES); // Number of set bits
localparam SETTOP = SETLEN+OFFSETLEN; localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits
localparam TAGLEN = `PA_BITS - SETTOP; localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits
localparam WORDSPERLINE = LINELEN/WORDLEN; localparam WORDSPERLINE = LINELEN/WORDLEN; // Number of words in cache line
localparam FlushAdrThreshold = NUMLINES - 1; localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete
localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; // *** see if this is the same as WORDSPERLINE
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); // ***
logic SelAdr; logic SelAdr;
logic [1:0] AdrSelMuxSel;
logic [SETLEN-1:0] CAdr; logic [SETLEN-1:0] CAdr;
logic [LINELEN-1:0] LineWriteData; logic [LINELEN-1:0] LineWriteData;
logic ClearValid; logic ClearValid, ClearDirty, SetDirty, SetValid;
logic ClearDirty;
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
logic [NUMWAYS-1:0] HitWay, ValidWay; logic [NUMWAYS-1:0] HitWay, ValidWay;
logic CacheHit; logic CacheHit;
logic SetDirty; logic [NUMWAYS-1:0] VictimWay, DirtyWay;
logic SetValid;
logic [NUMWAYS-1:0] VictimWay;
logic [NUMWAYS-1:0] DirtyWay;
logic LineDirty; logic LineDirty;
logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0];
logic [TAGLEN-1:0] Tag; logic [TAGLEN-1:0] Tag;
logic [SETLEN-1:0] FlushAdr; logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1;
logic [SETLEN-1:0] NextFlushAdr; logic FlushAdrCntEn, FlushCntRst;
logic [SETLEN-1:0] FlushAdrP1; logic FlushAdrFlag, FlushWayFlag;
logic FlushAdrCntEn; logic [NUMWAYS-1:0] FlushWay, NextFlushWay;
logic FlushCntRst;
logic FlushAdrFlag;
logic FlushWayFlag;
logic [NUMWAYS-1:0] FlushWay;
logic [NUMWAYS-1:0] NextFlushWay;
logic FlushWayCntEn; logic FlushWayCntEn;
logic SelWriteback; logic SelWriteback;
logic LRUWriteEn; logic LRUWriteEn;
logic SelFlush; logic SelFlush;
logic ResetOrFlushCntRst; logic ResetOrFlushCntRst;
logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
logic SelFetchBuffer; logic SelFetchBuffer;
logic CacheEn; logic CacheEn;
localparam LOGLLENBYTES = $clog2(WORDLEN/8);
localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN;
localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);
logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded;
logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel; logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
genvar index; genvar index;
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
@ -119,90 +107,99 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
// and FlushAdr when handling D$ flushes // and FlushAdr when handling D$ flushes
// The icache must update to the newest PCNextF on flush as it is probably a trap. Trap // The icache must update to the newest PCNextF on flush as it is probably a trap. Trap
// sets PCNextF to XTVEC and the icache must start reading the instruction. // sets PCNextF to XTVEC and the icache must start reading the instruction.
mux3 #(SETLEN) AdrSelMux( assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))};
.d0(NextAdr[SETTOP-1:OFFSETLEN]), .d1(PAdr[SETTOP-1:OFFSETLEN]), .d2(FlushAdr), mux3 #(SETLEN) AdrSelMux(NextAdr[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
.s({SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))}), .y(CAdr)); AdrSelMuxSel, CAdr);
// Array of cache ways, along with victim, hit, dirty, and read merging logic // Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0](
CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask, .clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
// Select victim way for associative caches
if(NUMWAYS > 1) begin:vict if(NUMWAYS > 1) begin:vict
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU( cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage), .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache); .SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
end else assign VictimWay = 1'b1; // one hot. end else
assign CacheHit = | HitWay; assign VictimWay = 1'b1; // one hot.
assign LineDirty = | DirtyWay;
assign CacheHit = |HitWay;
assign LineDirty = |DirtyWay;
// ReadDataLineWay is a 2d array of cache line len by number of ways. // ReadDataLineWay is a 2d array of cache line len by number of ways.
// Need to OR together each way in a bitwise manner. // Need to OR together each way in a bitwise manner.
// Final part of the AO Mux. First is the AND in the cacheway. // Final part of the AO Mux. First is the AND in the cacheway.
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache)); or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineCache));
or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag)); or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag));
// like to fix this. // Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines
if(DCACHE) if(DCACHE)
mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]), mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
.d1(BeatCount), .s(SelBusBeat), .d1(BeatCount), .s(SelBusBeat),
.y(WordOffsetAddr)); .y(WordOffsetAddr));
else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]; else
assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
// Bypass cache array to save a cycle when finishing a load miss
mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine); mux2 #(LINELEN) EarlyReturnMux(ReadDataLineCache, FetchBuffer, SelFetchBuffer, ReadDataLine);
// Select word from cache line
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread( subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL) subcachelineread(
.PAdr(WordOffsetAddr), .PAdr(WordOffsetAddr), .ReadDataLine, .ReadDataWord);
.ReadDataLine, .ReadDataWord);
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
/////////////////////////////////////////////////////////////////////////////////////////////
onehotdecoder #(LOGCWPL) adrdec(
.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
for(index = 0; index < 2**LOGCWPL; index++) begin
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
end
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
.d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
end
// Bus address for fetch, writeback, or flush writeback
mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
.d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}),
.s({SelFlush, SelWriteback}), .y(CacheBusAdr)); .s({SelFlush, SelWriteback}), .y(CacheBusAdr));
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Flush address and way generation during flush // Write Path
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Adjust byte mask from word to cache line
onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
for(index = 0; index < 2**LOGCWPL; index++) begin
assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
end
assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1.
assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
// Merge write data into fetched cache line for store miss
for(index = 0; index < LINELEN/8; index++) begin
mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
.d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
end
/////////////////////////////////////////////////////////////////////////////////////////////
// Flush logic
/////////////////////////////////////////////////////////////////////////////////////////////
// Flush address (line number)
assign ResetOrFlushCntRst = reset | FlushCntRst; assign ResetOrFlushCntRst = reset | FlushCntRst;
flopenr #(SETLEN) FlushAdrReg(.clk, .reset(ResetOrFlushCntRst), .en(FlushAdrCntEn), flopenr #(SETLEN) FlushAdrReg(clk, ResetOrFlushCntRst, FlushAdrCntEn, FlushAdrP1, NextFlushAdr);
.d(FlushAdrP1), .q(NextFlushAdr)); mux2 #(SETLEN) FlushAdrMux(NextFlushAdr, FlushAdrP1, FlushAdrCntEn, FlushAdr);
assign FlushAdr = FlushAdrCntEn ? FlushAdrP1 : NextFlushAdr;
assign FlushAdrP1 = NextFlushAdr + 1'b1; assign FlushAdrP1 = NextFlushAdr + 1'b1;
assign FlushAdrFlag = (NextFlushAdr == FlushAdrThreshold[SETLEN-1:0]); assign FlushAdrFlag = (NextFlushAdr == FLUSHADRTHRESHOLD[SETLEN-1:0]);
flopenl #(NUMWAYS) FlushWayReg(.clk, .load(ResetOrFlushCntRst), .en(FlushWayCntEn),
.val({{NUMWAYS-1{1'b0}}, 1'b1}), .d(NextFlushWay), .q(FlushWay)); // Flush way
assign FlushWayFlag = FlushWay[NUMWAYS-1]; flopenl #(NUMWAYS) FlushWayReg(clk, FlushWayCntEn, ResetOrFlushCntRst, {{NUMWAYS-1{1'b0}}, 1'b1}, NextFlushWay, FlushWay);
if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]};
else assign NextFlushWay = FlushWay[NUMWAYS-1]; else assign NextFlushWay = FlushWay[NUMWAYS-1];
assign FlushWayFlag = FlushWay[NUMWAYS-1];
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Cache FSM // Cache FSM
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck,
.FlushStage, .CacheRW, .CacheAtomic, .Stall, .FlushStage, .CacheRW, .CacheAtomic, .Stall,
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted,
.CacheMiss, .CacheAccess, .SelAdr, .CacheMiss, .CacheAccess, .SelAdr,
.ClearValid, .ClearDirty, .SetDirty, .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush,
.SetValid, .SelWriteback, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,
.InvalidateCache, .InvalidateCache, .CacheEn, .LRUWriteEn);
.CacheEn,
.LRUWriteEn);
endmodule endmodule