This d cache fsm is getting complex.

This commit is contained in:
Ross Thompson 2021-07-08 15:26:16 -05:00
parent 1fe06bc670
commit 910ddb83ae
2 changed files with 298 additions and 90 deletions

View File

@ -413,6 +413,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256)
assign NextFetchCount = FetchCount + 1'b1; assign NextFetchCount = FetchCount + 1'b1;
// This part is confusing. // This part is confusing.
// *** Ross Thompson reduce the complexity. This is just dumb.
// we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide
// we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros.
// fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with

View File

@ -5,125 +5,130 @@
// Implements the L1 data cache // Implements the L1 data cache
// //
// Purpose: Storage for data and meta data. // Purpose: Storage for data and meta data.
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions: // is furnished to do so, subject to the following conditions:
// //
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/////////////////////////////////////////// ///////////////////////////////////////////
`include "wally-config.vh" `include "wally-config.vh"
module dcache module dcache
(input logic clk, (input logic clk,
input logic reset, input logic reset,
input logic StallM, input logic StallM,
input logic StallW, input logic StallW,
input logic FlushM, input logic FlushM,
input logic FlushW, input logic FlushW,
// cpu side // cpu side
input logic [1:0] MemRWM, input logic [1:0] MemRWM,
input logic [2:0] Funct3M, input logic [2:0] Funct3M,
input logic [1:0] AtomicM, input logic [1:0] AtomicM,
input logic [`PA_BITS-1:0] MemAdrE, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] MemAdrE, // virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] MemPAdrM, // physical address input logic [`PA_BITS-1:0] MemPAdrM, // physical address
input logic [`XLEN-1:0] WriteDataM, input logic [`XLEN-1:0] WriteDataM,
output logic [`XLEN-1:0] ReadDataW, output logic [`XLEN-1:0] ReadDataW,
output logic DCacheStall, output logic DCacheStall,
// inputs from TLB and PMA/P // inputs from TLB and PMA/P
input logic FaultM, input logic FaultM,
input logic DTLBMissM, input logic DTLBMissM,
input logic UncachedM,
// ahb side // ahb side
output logic [`PA_BITS-1:0] AHBPAdr, // to ahb output logic [`PA_BITS-1:0] AHBPAdr, // to ahb
output logic AHBRead, output logic AHBRead,
output logic AHBWrite, output logic AHBWrite,
input logic AHBAck, // from ahb input logic AHBAck, // from ahb
input logic [`XLEN-1:0] HRDATA, // from ahb input logic [`XLEN-1:0] HRDATA, // from ahb
output logic [`XLEN-1:0] HWDATA, // to ahb output logic [`XLEN-1:0] HWDATA, // to ahb
output logic [2:0] AHBSize output logic [2:0] AHBSize
); );
localparam integer BLOCKLEN = 256; localparam integer BLOCKLEN = 256;
localparam integer NUMLINES = 512; localparam integer NUMLINES = 512;
localparam integer NUMWAYS = 4; localparam integer NUMWAYS = 4;
localparam integer NUMREPL_BITS = 3; localparam integer NUMREPL_BITS = 3;
localparam integer BLOCKBYTELEN = BLOCKLEN/8; localparam integer BLOCKBYTELEN = BLOCKLEN/8;
localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN);
localparam integer INDEXLEN = $clog2(NUMLINES); localparam integer INDEXLEN = $clog2(NUMLINES);
localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN; localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN;
localparam integer WORDSPERLINE = BLOCKLEN/`XLEN; localparam integer WORDSPERLINE = BLOCKLEN/`XLEN;
localparam integer LOGWPL = $clog2(WORDSPERLINE);
logic [1:0] AdrSel; logic SelAdrM;
logic [`PA_BITS-1:0] MemPAdrW; logic [`PA_BITS-1:0] MemPAdrW;
logic [INDEXLEN-1:0] SRAMAdr; logic [INDEXLEN-1:0] SRAMAdr;
logic [NUMWAYS-1:0] WriteEnable; logic [NUMWAYS-1:0] WriteEnable;
logic [NUMWAYS-1:0] WriteWordEnable; logic [NUMWAYS-1:0] WriteWordEnable;
logic [BLOCKLEN-1:0] SRAMWriteData; logic [BLOCKLEN-1:0] SRAMWriteData;
logic [TAGLEN-1:0] WriteTag; logic SetValidM, ClearValidM, SetValidW, ClearValidW;
logic SetValid, ClearValid; logic SetDirtyM, ClearDirtyM, SetDirtyW, ClearDirtyW;
logic SetDirty, ClearDirty; logic [BLOCKLEN-1:0] ReadDataM, ReadDataMaskedM [NUMWAYS-1:0];
logic [BLOCKLEN-1:0] ReadDataM, ReadDataMaskedM [NUMWAYS-1:0]; logic [TAGLEN-1:0] TagData [NUMWAYS-1:0];
logic [TAGLEN-1:0] TagData [NUMWAYS-1:0]; logic [NUMWAYS-1:0] Valid, Dirty, WayHit;
logic [NUMWAYS-1:0] Valid, Dirty, WayHit; logic CacheHit;
logic Hit;
logic [NUMREPL_BITS-1:0] ReplacementBits, NewReplacement; logic [NUMREPL_BITS-1:0] ReplacementBits, NewReplacement;
logic [BLOCKLEN-1:0] ReadDataSelectWayM; logic [BLOCKLEN-1:0] ReadDataSelectWayM;
logic [`XLEN-1:0] ReadDataSelectWayXLEN [(WORDSPERLINE)-1:0]; logic [`XLEN-1:0] ReadDataSelectWayXLEN [(WORDSPERLINE)-1:0];
logic [`XLEN-1:0] WordReadDataM, FinalReadDataM; logic [`XLEN-1:0] WordReadDataM, FinalReadDataM;
logic [`XLEN-1:0] WriteDataW, FinalWriteDataW; logic [`XLEN-1:0] WriteDataW, FinalWriteDataW, FinalAMOWriteDataW;
logic [BLOCKLEN-1:0] FinalWriteDataWordsW; logic [BLOCKLEN-1:0] FinalWriteDataWordsW;
logic [LOGWPL:0] FetchCount, NextFetchCount;
logic [NUMWAYS-1:0] SRAMWordWriteEnableM, SRAMWordWriteEnableW;
logic [WORDSPERLINE-1:0] SRAMWordEnable [NUMWAYS-1:0];
logic SelMemWriteDataM, SelMemWriteDataW;
logic [2:0] Funct3W;
logic SRAMWordWriteEnableM, SRAMWordWriteEnableW;
logic SRAMBlockWriteEnableM;
logic SRAMWriteEnable;
logic SaveSRAMRead;
logic [1:0] AtomicW;
typedef enum {STATE_READY,
STATE_MISS_FETCH_WDV,
STATE_MISS_FETCH_DONE,
STATE_MISS_WRITE_BACK,
STATE_MISS_READ_SRAM,
STATE_AMO_MISS_FETCH_WDV,
STATE_AMO_MISS_FETCH_DONE,
STATE_AMO_MISS_WRITE_BACK,
STATE_AMO_MISS_READ_SRAM,
STATE_AMO_MISS_UPDATE,
STATE_AMO_MISS_WRITE,
STATE_AMO_UPDATE,
STATE_AMO_WRITE,
STATE_SRAM_BUSY,
STATE_PTW_READY,
STATE_PTW_FETCH,
STATE_UNCACHED} statetype;
statetype CurrState, NextState;
// data path
flopen #(`PA_BITS) MemPAdrWReg(.clk(clk), flopen #(`PA_BITS) MemPAdrWReg(.clk(clk),
.en(~StallW), .en(~StallW),
.d(MemPAdrM), .d(MemPAdrM),
.q(MemPAdrW)); .q(MemPAdrW));
mux3 #(INDEXLEN) mux2 #(INDEXLEN)
AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSET-1:OFFSET]), AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSET-1:OFFSET]),
.d1(MemPAdrM[INDEXLEN+OFFSET-1:OFFSET]), .d1(MemPAdrM[INDEXLEN+OFFSET-1:OFFSET]),
.d2(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]), .s(SelAdrM),
.s(AdrSel), .y(AdrMuxOut));
.y(SRAMAdr));
genvar way;
mux2 #(INDEXLEN)
SelAdrlMux2(.d0(AdrMuxOut),
.d1(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]),
.s(SRAMWordWriteEnableW),
.y(SRAMAdr));
genvar way;
generate generate
for(way = 0; way < NUMWAYS; way = way + 1) begin for(way = 0; way < NUMWAYS; way = way + 1) begin
DCacheMem #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN)) DCacheMem #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN))
@ -134,11 +139,11 @@ module dcache
.WriteEnable(SRAMWriteEnable[way]), .WriteEnable(SRAMWriteEnable[way]),
.WriteWordEnable(SRAMWordEnable[way]), .WriteWordEnable(SRAMWordEnable[way]),
.WriteData(SRAMWriteData), .WriteData(SRAMWriteData),
.WriteTag(WriteTag), .WriteTag(MemPAdrW[`PA_BITS-1:OFFSET+INDEXLEN]),
.SetValid(SetValid), .SetValid(SetValidW),
.ClearValid(ClearValid), .ClearValid(ClearValidW),
.SetDirty(SetDirty), .SetDirty(SetDirtyW),
.ClearDirty(ClearDirty), .ClearDirty(ClearDirtyW),
.ReadData(ReadDataM[way]), .ReadData(ReadDataM[way]),
.ReadTag(ReadTag[way]), .ReadTag(ReadTag[way]),
.Valid(Valid[way]), .Valid(Valid[way]),
@ -150,10 +155,13 @@ module dcache
always_ff @(posedge clk, posedge reset) begin always_ff @(posedge clk, posedge reset) begin
if (reset) ReplacementBits <= '0; if (reset) ReplacementBits <= '0;
else if (WriteEnable) ReplacementBits[MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]] <= NewReplacement; else if (SRAMWriteEnable) ReplacementBits[MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]] <= NewReplacement;
end end
assign Hit = |WayHit; // *** TODO add replacement policy
assign NewReplacement = '0;
assign CacheHit = |WayHit;
assign ReadDataSelectWayM = |ReadDataMaskedM; // second part of AO mux. assign ReadDataSelectWayM = |ReadDataMaskedM; // second part of AO mux.
// Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can
@ -183,13 +191,29 @@ module dcache
.en(~StallW), .en(~StallW),
.d(WriteDataM), .d(WriteDataM),
.q(WriteDataW)); .q(WriteDataW));
flopr #(3) Funct3WReg(.clk(clk),
.reset(reset),
.d(Funct3M),
.q(Funct3W));
subwordwrite subwordwrite(.HRDATA(ReadDataW), subwordwrite subwordwrite(.HRDATA(ReadDataW),
.HADDRD(MemPAdrM[`XLEN/8-1:0]), .HADDRD(MemPAdrM[`XLEN/8-1:0]),
.HSIZED(Funct3W), .HSIZED(Funct3W),
.HWDATAIN(WriteDataW), .HWDATAIN(WriteDataW),
.HWDATA(FinalWriteDataW)); .HWDATA(FinalWriteDataW));
generate
if (`A_SUPPORTED) begin
logic [`XLEN-1:0] AMOResult;
amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataW), .funct(Funct7W), .width(Funct3W),
.result(AMOResult));
mux2 #(`XLEN) wdmux(FinalWriteDataW, AMOResult, SelAMOWrite & AtomicW[1], FinalAMOWriteDataW);
end else
assign FinalAMOWriteDataW = FinalWriteDataW;
endgenerate
// register the fetch data from the next level of memory. // register the fetch data from the next level of memory.
generate generate
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
@ -199,23 +223,206 @@ module dcache
.q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); .q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN]));
end end
endgenerate endgenerate
flopenr #(LOGWPL+1)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
assign AHBPAdr = (FetchCount << (`XLEN/8)) + MemPAdrM;
// remove later
assign AHBSize = 3'b000;
// mux between the CPU's write and the cache fetch. // mux between the CPU's write and the cache fetch.
generate generate
for(index = 0; index < WORDSPERLINE; index++) begin for(index = 0; index < WORDSPERLINE; index++) begin
assign FinalWriteDataWordsW[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalWriteDataW; assign FinalWriteDataWordsW[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalAMOWriteDataW;
end end
endgenerate endgenerate
mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsW), mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsW),
.d1(DCacheMemWriteData), .d1(DCacheMemWriteData),
.s(SelMemWriteData), .s(SRAMBlockWriteEnableM),
.y(SRAMWriteData)); .y(SRAMWriteData));
// control path *** eventually move to own module.
logic AnyCPUReqM;
logic FetchCountFlag;
logic PreCntEn;
logic CntEn;
logic CntReset;
typedef enum {STATE_READY,
STATE_READ_MISS_FETCH_WDV,
STATE_READ_MISS_FETCH_DONE,
STATE_READ_MISS_CHECK_EVICTED_DIRTY,
STATE_READ_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_READ_MISS_WRITE_CACHE_BLOCK,
STATE_READ_MISS_READ_WORD,
STATE_WRITE_MISS_FETCH_WDV,
STATE_WRITE_MISS_FETCH_DONE,
STATE_WRITE_MISS_CHECK_EVICTED_DIRTY,
STATE_WRITE_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_WRITE_MISS_WRITE_CACHE_BLOCK,
STATE_WRITE_MISS_WRITE_WORD,
STATE_AMO_MISS_FETCH_WDV,
STATE_AMO_MISS_FETCH_DONE,
STATE_AMO_MISS_CHECK_EVICTED_DIRTY,
STATE_AMO_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_AMO_MISS_WRITE_CACHE_BLOCK,
STATE_AMO_MISS_READ_WORD,
STATE_AMO_MISS_UPDATE_WORD,
STATE_AMO_MISS_WRITE_WORD,
STATE_AMO_UPDATE,
STATE_AMO_WRITE,
STATE_SRAM_BUSY,
STATE_PTW_READY,
STATE_PTW_MISS_FETCH_WDV,
STATE_PTW_MISS_FETCH_DONE,
STATE_PTW_MISS_CHECK_EVICTED_DIRTY,
STATE_PTW_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_PTW_MISS_WRITE_CACHE_BLOCK,
STATE_PTW_MISS_READ_SRAM,
STATE_UNCACHED_WDV,
STATE_UNCACHED_DONE} statetype;
statetype CurrState, NextState;
localparam FetchCountThreshold = WORDSPERLINE - 1;
assign AnyCPUReqM = |MemRWM | (|AtomicM);
assign FetchCountFlag = (FetchCount == FetchCountThreshold);
flopenr #(LOGWPL+1)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
assign SRAMWriteEnable = SRAMBlockWriteEnableM | SRAMWordWriteEnableW;
flopr #(1+4+2)
SRAMWritePipeReg(.clk(clk),
.reset(reset),
.d({SRAMWordWriteEnableM, SetValidM, ClearValidM, SetDiryM, ClearDirtyM, AtomicM}),
.q({SRAMWordWriteEnableW, SetValidW, ClearValidM, SetDiryM, ClearDirtyM, AtomicW}));
// fsm state regs
flopenl #(.TYPE(statetype))
FSMReg(.clk(clk),
.load(reset),
.en(1'b1),
.val(STATE_READY),
.d(NextState),
.q(CurrState));
// next state logic and some state ouputs.
always_comb begin
DCacheStall = 1'b0;
SelAdrM = 2'b00;
PreCntEn = 1'b0;
SetValidM = 1'b0;
ClearValidM = 1'b0;
SetDirtyM = 1'b0;
ClearDirtyM = 1'b0;
SelMemWriteDataM = 1'b0;
SRAMWordWriteEnableM = 1'b0;
SRAMBlockWriteEnableM = 1'b0;
SaveSRAMRead = 1'b1;
CntReset = 1'b0;
case (CurrState)
STATE_READY: begin
// sram busy
if (AnyCPUReqM & SRAMWordWriteEnableW) begin
NextState = STATE_BUSY;
DCacheStall = 1'b1;
end
// TLB Miss
else if(AnyCPUReqM & DTLBMissM) begin
NextState = STATE_PTW_MISS_FETCH_WDV;
end
// amo hit
else if(|AtomicM & ~UncachedM & ~FSMReg & CacheHit & ~DTLBMissM) begin
NextState = STATE_AMO_UPDATE;
DCacheStall = 1'b1;
end
// read hit valid cached
else if(MemRWM[1] & ~UncachedM & ~FaultM & CacheHit & ~DTLBMissM) begin
NextState = STATE_READY;
DCacheStall = 1'b0;
end
// write hit valid cached
else if (MemRWM[0] & ~UncachedM & ~FaultM & CacheHit & ~DTLBMissM) begin
NextState = STATE_READY;
DCacheStall = 1'b0;
SRAMWordWriteEnableM = 1'b1;
SetDirtyM = 1'b1;
end
// read miss valid cached
else if(MemRWM[1] & ~UncachedM & ~FaultM & ~CacheHit & ~DTLBMissM) begin
NextState = STATE_READ_MISS_FETCH_WDV;
CntReset = 1'b1;
DCacheStall = 1'b1;
end
// fault
else if(|MemRWM & FaultM & ~DTLBMissM) begin
NextState = STATE_READY;
end
end
STATE_AMO_UPDATE: begin
NextState = STATE_AMO_WRITE;
SaveSRAMRead = 1'b1;
SRAMWordWriteEnableM = 1'b1; // pipelined 1 cycle
end
STATE_AMO_WRITE: begin
NextState = STATE_READY;
SelAMOWrite = 1'b1;
end
STATE_READ_MISS_FETCH_WDV: begin
DCacheStall = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & AHBAck) begin
NextState = STATE_READ_MISS_FETCH_DONE;
end else begin
NextState = STATE_READ_MISS_FETCH_WDV;
end
end
STATE_READ_MISS_FETCH_DONE: begin
DCacheStall = 1'b1;
NextState = STATE_READ_MISS_CHECK_EVICTED_DIRTY;
end
STATE_PTW_MISS_FETCH_WDV: begin
DCacheStall = 1'b1;
AdrSel = 2'b01;
if (FetchCountFlag & AHBAck) begin
NextState = STATE_PTW_MISS_FETCH_DONE;
end else begin
NextState = STATE_PTW_MISS_FETCH_WDV;
end
end
default: begin
end
endcase
end
assign CntEn = PreCntEn & AHBAck;
endmodule; // dcache endmodule; // dcache