On the way to solving the store delay hazard.

This commit is contained in:
Rose Thompson 2023-12-13 10:39:01 -06:00
parent c6ed08ce12
commit 13bb5d845b
9 changed files with 53 additions and 30 deletions

25
src/cache/cache.sv vendored
View File

@ -34,6 +34,7 @@ module cache import cvw::*; #(parameter cvw_t P,
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
// cpu side
input logic [1:0] CacheRWNext, // [1] Read, [0] Write
input logic [1:0] CacheRW, // [1] Read, [0] Write
input logic FlushCache, // Flush all dirty lines back to memory
input logic InvalidateCache, // Clear all valid bits
@ -71,9 +72,12 @@ module cache import cvw::*; #(parameter cvw_t P,
localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word
logic SelAdr;
logic [1:0] AdrSelMuxSel;
logic [SETLEN-1:0] CacheSet;
logic SelAdrData;
logic SelAdrTag;
logic [1:0] AdrSelMuxSelData;
logic [1:0] AdrSelMuxSelTag;
logic [SETLEN-1:0] CacheSetData;
logic [SETLEN-1:0] CacheSetTag;
logic [LINELEN-1:0] LineWriteData;
logic ClearDirty, SetDirty, SetValid, ClearValid;
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
@ -108,20 +112,23 @@ module cache import cvw::*; #(parameter cvw_t P,
// and FlushAdr when handling D$ flushes
// The icache must update to the newest PCNextF on flush as it is probably a trap. Trap
// sets PCNextF to XTVEC and the icache must start reading the instruction.
assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))};
mux3 #(SETLEN) AdrSelMux(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
AdrSelMuxSel, CacheSet);
assign AdrSelMuxSelData = {SelFlush, ((SelAdrData | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))};
mux3 #(SETLEN) AdrSelMuxData(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
AdrSelMuxSelData, CacheSetData);
assign AdrSelMuxSelTag = {SelFlush, ((SelAdrTag | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))};
mux3 #(SETLEN) AdrSelMuxTag(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
AdrSelMuxSelTag, CacheSetTag);
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
.clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, .SelWay,
.clk, .reset, .CacheEn, .CacheSetData, .CacheSetTag, .PAdr, .LineWriteData, .LineByteMask, .SelWay,
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitDirtyWay, .TagWay, .FlushStage, .InvalidateCache);
// Select victim way for associative caches
if(NUMWAYS > 1) begin:vict
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
.clk, .reset, .FlushStage, .CacheEn, .HitWay, .ValidWay, .VictimWay, .CacheSet, .LRUWriteEn,
.clk, .reset, .FlushStage, .CacheEn, .HitWay, .ValidWay, .VictimWay, .CacheSetData, .LRUWriteEn,
.SetValid, .ClearValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache);
end else
assign VictimWay = 1'b1; // one hot.
@ -220,7 +227,7 @@ module cache import cvw::*; #(parameter cvw_t P,
cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck,
.FlushStage, .CacheRW, .Stall,
.CacheHit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted,
.CacheMiss, .CacheAccess, .SelAdr, .SelWay,
.CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelWay,
.ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .SelFlush,
.FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst,
.FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer,

View File

@ -35,7 +35,7 @@ module cacheLRU
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
input logic [NUMWAYS-1:0] HitWay, // Which way is valid and matches PAdr's tag
input logic [NUMWAYS-1:0] ValidWay, // Which ways for a particular set are valid, ignores tag
input logic [SETLEN-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [SETLEN-1:0] CacheSetData, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [SETLEN-1:0] PAdr, // Physical address
input logic LRUWriteEn, // Update the LRU state
input logic SetValid, // Set the dirty bit in the selected way and set
@ -139,7 +139,7 @@ module cacheLRU
// LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice.
// This is a two port memory.
// Every cycle must read from CacheSet and each load/store must write the new LRU.
// Every cycle must read from CacheSetData and each load/store must write the new LRU.
always_ff @(posedge clk) begin
if (reset | (InvalidateCache & ~FlushStage)) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
if(CacheEn) begin
@ -147,10 +147,10 @@ module cacheLRU
LRUMemory[PAdr] <= '0;
else if(LRUWriteEn)
LRUMemory[PAdr] <= NextLRU;
if(LRUWriteEn & (PAdr == CacheSet))
if(LRUWriteEn & (PAdr == CacheSetData))
CurrLRU <= #1 NextLRU;
else
CurrLRU <= #1 LRUMemory[CacheSet];
CurrLRU <= #1 LRUMemory[CacheSetData];
end
end

13
src/cache/cachefsm.sv vendored
View File

@ -54,7 +54,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
input logic HitLineDirty, // The cache hit way is dirty
input logic FlushAdrFlag, // On last set of a cache flush
input logic FlushWayFlag, // On the last way for any set of a cache flush
output logic SelAdr, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr
output logic SelAdrData, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr
output logic SelAdrTag, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr
output logic SetValid, // Set the valid bit in the selected way and set
output logic ClearValid, // Clear the valid bit in the selected way and set
output logic SetDirty, // Set the dirty bit in the selected way and set
@ -172,9 +173,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
(CurrState == STATE_WRITE_LINE);
assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2] | ~CacheBusAck)) |
(CurrState == STATE_READY & AnyMiss & LineDirty);
/* -----\/----- EXCLUDED -----\/-----
assign SelFlush = (CurrState == STATE_READY & FlushCache) |
(CurrState == STATE_FLUSH) |
(CurrState == STATE_FLUSH_WRITEBACK);
-----/\----- EXCLUDED -----/\----- */
assign SelFlush = FlushCache;
// coverage off -item e 1 -fecexprrow 1
// (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck)
assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) |
@ -193,7 +197,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P,
(CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) |
(CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & ~CacheBusAck);
assign SelAdr = (CurrState == STATE_READY & (CacheRW[0] | AnyMiss | (|CMOp))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed
assign SelAdrData = (CurrState == STATE_READY & (CacheRW[0] | AnyMiss | (|CMOp))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed
(CurrState == STATE_FETCH) |
(CurrState == STATE_WRITEBACK) |
(CurrState == STATE_WRITE_LINE) |
resetDelay;
assign SelAdrTag = (CurrState == STATE_READY & (AnyMiss | (|CMOp))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed
(CurrState == STATE_FETCH) |
(CurrState == STATE_WRITEBACK) |
(CurrState == STATE_WRITE_LINE) |

21
src/cache/cacheway.sv vendored
View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// cacheway
//
// Written: Ross Thompson ross1728@gmail.com
// Written: Rose Thompson ross1728@gmail.com
// Created: 7 July 2021
// Modified: 20 January 2023
//
@ -34,7 +34,8 @@ module cacheway import cvw::*; #(parameter cvw_t P,
input logic reset,
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [$clog2(NUMLINES)-1:0] CacheSetData, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [$clog2(NUMLINES)-1:0] CacheSetTag, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
input logic [PA_BITS-1:0] PAdr, // Physical address
input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only)
input logic SetValid, // Set the valid bit in the selected way and set
@ -113,7 +114,7 @@ module cacheway import cvw::*; #(parameter cvw_t P,
/////////////////////////////////////////////////////////////////////////////////////////////
ram1p1rwe #(.USE_SRAM(P.USE_SRAM), .DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
.addr(CacheSet), .dout(ReadTag),
.addr(CacheSetTag), .dout(ReadTag),
.din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
// AND portion of distributed tag multiplexer
@ -135,12 +136,12 @@ module cacheway import cvw::*; #(parameter cvw_t P,
for(words = 0; words < NUMSRAM; words++) begin: word
if (!READ_ONLY_CACHE) begin:wordram
ram1p1rwbe #(.USE_SRAM(P.USE_SRAM), .DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet),
ram1p1rwbe #(.USE_SRAM(P.USE_SRAM), .DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSetData),
.dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
end else begin:wordram // no byte-enable needed for i$.
ram1p1rwe #(.USE_SRAM(P.USE_SRAM), .DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet),
ram1p1rwe #(.USE_SRAM(P.USE_SRAM), .DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSetData),
.dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
.we(SelectedWriteWordEn));
@ -157,10 +158,10 @@ module cacheway import cvw::*; #(parameter cvw_t P,
always_ff @(posedge clk) begin // Valid bit array,
if (reset) ValidBits <= #1 '0;
if(CacheEn) begin
ValidWay <= #1 ValidBits[CacheSet];
ValidWay <= #1 ValidBits[CacheSetData];
if(InvalidateCache) ValidBits <= #1 '0; // exclusion-tag: dcache invalidateway
else if (SetValidEN) ValidBits[CacheSet] <= #1 SetValidWay;
else if (ClearValidEN) ValidBits[CacheSet] <= #1 '0;
else if (SetValidEN) ValidBits[CacheSetData] <= #1 SetValidWay;
else if (ClearValidEN) ValidBits[CacheSetData] <= #1 '0;
end
end
@ -174,8 +175,8 @@ module cacheway import cvw::*; #(parameter cvw_t P,
// reset is optional. Consider merging with TAG array in the future.
//if (reset) DirtyBits <= #1 {NUMLINES{1'b0}};
if(CacheEn) begin
Dirty <= #1 DirtyBits[CacheSet];
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CacheSet] <= #1 SetDirtyWay;
Dirty <= #1 DirtyBits[CacheSetData];
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CacheSetData] <= #1 SetDirtyWay;
end
end
end else assign Dirty = 1'b0;

View File

@ -68,6 +68,7 @@ module controller import cvw::*; #(parameter cvw_t P) (
// Memory stage control signals
input logic StallM, FlushM, // Stall, flush Memory stage
output logic [1:0] MemRWE, // Mem read/write: MemRWM[1] = 1 for read, MemRWM[0] = 1 for write
output logic [1:0] MemRWM, // Mem read/write: MemRWM[1] = 1 for read, MemRWM[0] = 1 for write
output logic CSRReadM, CSRWriteM, PrivilegedM, // CSR read, write, or privileged instruction
output logic [1:0] AtomicM, // Atomic (AMO) instruction
@ -97,7 +98,7 @@ module controller import cvw::*; #(parameter cvw_t P) (
logic RegWriteD, RegWriteE; // RegWrite (register will be written)
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file
logic [2:0] PreImmSrcD; // Immediate source format (before amending for prefetches)
logic [1:0] MemRWD, MemRWE; // Store (write to memory)
logic [1:0] MemRWD; // Store (write to memory)
logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3)
logic BaseW64D; // W64 for Base instructions specifically
logic BaseRegWriteD; // Indicates if Base instruction register write instruction

View File

@ -50,6 +50,7 @@ module ieu import cvw::*; #(parameter cvw_t P) (
output logic LSUPrefetchM, // datata prefetch
// Memory stage signals
input logic SquashSCW, // Squash store conditional, from LSU
output logic [1:0] MemRWE, // Read/write control goes to LSU
output logic [1:0] MemRWM, // Read/write control goes to LSU
output logic [1:0] AtomicM, // Atomic control goes to LSU
output logic [P.XLEN-1:0] WriteDataM, // Write data to LSU
@ -107,7 +108,7 @@ module ieu import cvw::*; #(parameter cvw_t P) (
.PCSrcE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .ALUSelectE, .MemReadE, .CSRReadE,
.Funct3E, .IntDivE, .MDUE, .W64E, .SubArithE, .BranchD, .BranchE, .JumpD, .JumpE, .SCE,
.BranchSignedE, .BSelectE, .ZBBSelectE, .BALUControlE, .BMUActiveE, .MDUActiveE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM,
.StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.StallM, .FlushM, .MemRWE, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .InvalidateICacheM, .StoreStallD);

View File

@ -243,7 +243,7 @@ module ifu import cvw::*; #(parameter cvw_t P) (
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
.ByteMask('0), .BeatCount('0), .SelBusBeat('0),
.CacheWriteData('0),
.CacheRW(CacheRWF),
.CacheRW(CacheRWF), .CacheRWNext('0), // CacheRWNext is only used to detect hazards. Not possible with icache
.FlushCache('0),
.NextSet(PCSpillNextF[11:0]),
.PAdr(PCPF),

View File

@ -34,6 +34,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
input logic StallM, FlushM, StallW, FlushW,
output logic LSUStallM, // LSU stalls pipeline during a multicycle operation
// connected to cpu (controls)
input logic [1:0] MemRWE, // Read/Write control
input logic [1:0] MemRWM, // Read/Write control
input logic [2:0] Funct3M, // Size of memory operation
input logic [6:0] Funct7M, // Atomic memory operation function
@ -315,7 +316,8 @@ module lsu import cvw::*; #(parameter cvw_t P) (
cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache(
.clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM),
.clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRWNext(MemRWE), // *** change to LSURWE after updating hptw and atomic
.CacheRW(SelStoreDelay ? 2'b00 : CacheRWM),
.FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM),
.ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]),
.CacheWriteData(LSUWriteDataSpillM), .SelHPTW,

View File

@ -64,6 +64,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
logic [P.XLEN-1:0] PCM;
logic [P.XLEN-1:0] CSRReadValW, MDUResultW;
logic [P.XLEN-1:0] UnalignedPCNextF, PC2NextF;
logic [1:0] MemRWE;
logic [1:0] MemRWM;
logic InstrValidD, InstrValidE, InstrValidM;
logic InstrMisalignedFaultM;
@ -198,6 +199,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, .MDUActiveE, .CMOpM, .IFUPrefetchE, .LSUPrefetchM,
// Memory stage interface
.SquashSCW, // from LSU
.MemRWE, // read/write control goes to LSU
.MemRWM, // read/write control goes to LSU
.AtomicM, // atomic control goes to LSU
.WriteDataM, // Write data to LSU
@ -216,7 +218,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
lsu #(P) lsu(
.clk, .reset, .StallM, .FlushM, .StallW, .FlushW,
// CPU interface
.MemRWM, .Funct3M, .Funct7M(InstrM[31:25]), .AtomicM,
.MemRWE, .MemRWM, .Funct3M, .Funct7M(InstrM[31:25]), .AtomicM,
.CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW,
.FpLoadStoreM, .FWriteDataM, .IEUAdrE, .IEUAdrM, .WriteDataM,
.ReadDataW, .FlushDCacheM, .CMOpM, .LSUPrefetchM,