This d cache fsm is getting complex.

This commit is contained in:
Ross Thompson 2021-07-08 15:26:16 -05:00
parent adcc7afffa
commit 4c5aee3042
2 changed files with 298 additions and 90 deletions

View File

@ -413,6 +413,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256)
assign NextFetchCount = FetchCount + 1'b1; assign NextFetchCount = FetchCount + 1'b1;
// This part is confusing. // This part is confusing.
// *** Ross Thompson reduce the complexity. This is just dumb.
// we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide
// we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros.
// fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with

View File

@ -27,103 +27,108 @@
module dcache module dcache
(input logic clk, (input logic clk,
input logic reset, input logic reset,
input logic StallM, input logic StallM,
input logic StallW, input logic StallW,
input logic FlushM, input logic FlushM,
input logic FlushW, input logic FlushW,
// cpu side // cpu side
input logic [1:0] MemRWM, input logic [1:0] MemRWM,
input logic [2:0] Funct3M, input logic [2:0] Funct3M,
input logic [1:0] AtomicM, input logic [1:0] AtomicM,
input logic [`PA_BITS-1:0] MemAdrE, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] MemAdrE, // virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] MemPAdrM, // physical address input logic [`PA_BITS-1:0] MemPAdrM, // physical address
input logic [`XLEN-1:0] WriteDataM, input logic [`XLEN-1:0] WriteDataM,
output logic [`XLEN-1:0] ReadDataW, output logic [`XLEN-1:0] ReadDataW,
output logic DCacheStall, output logic DCacheStall,
// inputs from TLB and PMA/P // inputs from TLB and PMA/P
input logic FaultM, input logic FaultM,
input logic DTLBMissM, input logic DTLBMissM,
input logic UncachedM,
// ahb side // ahb side
output logic [`PA_BITS-1:0] AHBPAdr, // to ahb output logic [`PA_BITS-1:0] AHBPAdr, // to ahb
output logic AHBRead, output logic AHBRead,
output logic AHBWrite, output logic AHBWrite,
input logic AHBAck, // from ahb input logic AHBAck, // from ahb
input logic [`XLEN-1:0] HRDATA, // from ahb input logic [`XLEN-1:0] HRDATA, // from ahb
output logic [`XLEN-1:0] HWDATA, // to ahb output logic [`XLEN-1:0] HWDATA, // to ahb
output logic [2:0] AHBSize output logic [2:0] AHBSize
); );
localparam integer BLOCKLEN = 256; localparam integer BLOCKLEN = 256;
localparam integer NUMLINES = 512; localparam integer NUMLINES = 512;
localparam integer NUMWAYS = 4; localparam integer NUMWAYS = 4;
localparam integer NUMREPL_BITS = 3; localparam integer NUMREPL_BITS = 3;
localparam integer BLOCKBYTELEN = BLOCKLEN/8; localparam integer BLOCKBYTELEN = BLOCKLEN/8;
localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN);
localparam integer INDEXLEN = $clog2(NUMLINES); localparam integer INDEXLEN = $clog2(NUMLINES);
localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN; localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN;
localparam integer WORDSPERLINE = BLOCKLEN/`XLEN; localparam integer WORDSPERLINE = BLOCKLEN/`XLEN;
localparam integer LOGWPL = $clog2(WORDSPERLINE);
logic [1:0] AdrSel;
logic [`PA_BITS-1:0] MemPAdrW; logic SelAdrM;
logic [INDEXLEN-1:0] SRAMAdr; logic [`PA_BITS-1:0] MemPAdrW;
logic [NUMWAYS-1:0] WriteEnable; logic [INDEXLEN-1:0] SRAMAdr;
logic [NUMWAYS-1:0] WriteWordEnable; logic [NUMWAYS-1:0] WriteEnable;
logic [BLOCKLEN-1:0] SRAMWriteData; logic [NUMWAYS-1:0] WriteWordEnable;
logic [TAGLEN-1:0] WriteTag; logic [BLOCKLEN-1:0] SRAMWriteData;
logic SetValid, ClearValid; logic SetValidM, ClearValidM, SetValidW, ClearValidW;
logic SetDirty, ClearDirty; logic SetDirtyM, ClearDirtyM, SetDirtyW, ClearDirtyW;
logic [BLOCKLEN-1:0] ReadDataM, ReadDataMaskedM [NUMWAYS-1:0]; logic [BLOCKLEN-1:0] ReadDataM, ReadDataMaskedM [NUMWAYS-1:0];
logic [TAGLEN-1:0] TagData [NUMWAYS-1:0]; logic [TAGLEN-1:0] TagData [NUMWAYS-1:0];
logic [NUMWAYS-1:0] Valid, Dirty, WayHit; logic [NUMWAYS-1:0] Valid, Dirty, WayHit;
logic Hit; logic CacheHit;
logic [NUMREPL_BITS-1:0] ReplacementBits, NewReplacement; logic [NUMREPL_BITS-1:0] ReplacementBits, NewReplacement;
logic [BLOCKLEN-1:0] ReadDataSelectWayM; logic [BLOCKLEN-1:0] ReadDataSelectWayM;
logic [`XLEN-1:0] ReadDataSelectWayXLEN [(WORDSPERLINE)-1:0]; logic [`XLEN-1:0] ReadDataSelectWayXLEN [(WORDSPERLINE)-1:0];
logic [`XLEN-1:0] WordReadDataM, FinalReadDataM; logic [`XLEN-1:0] WordReadDataM, FinalReadDataM;
logic [`XLEN-1:0] WriteDataW, FinalWriteDataW; logic [`XLEN-1:0] WriteDataW, FinalWriteDataW, FinalAMOWriteDataW;
logic [BLOCKLEN-1:0] FinalWriteDataWordsW; logic [BLOCKLEN-1:0] FinalWriteDataWordsW;
logic [LOGWPL:0] FetchCount, NextFetchCount;
logic [NUMWAYS-1:0] SRAMWordWriteEnableM, SRAMWordWriteEnableW;
logic [WORDSPERLINE-1:0] SRAMWordEnable [NUMWAYS-1:0];
logic SelMemWriteDataM, SelMemWriteDataW;
logic [2:0] Funct3W;
logic SRAMWordWriteEnableM, SRAMWordWriteEnableW;
logic SRAMBlockWriteEnableM;
logic SRAMWriteEnable;
logic SaveSRAMRead;
logic [1:0] AtomicW;
typedef enum {STATE_READY,
STATE_MISS_FETCH_WDV,
STATE_MISS_FETCH_DONE,
STATE_MISS_WRITE_BACK,
STATE_MISS_READ_SRAM,
STATE_AMO_MISS_FETCH_WDV,
STATE_AMO_MISS_FETCH_DONE,
STATE_AMO_MISS_WRITE_BACK,
STATE_AMO_MISS_READ_SRAM,
STATE_AMO_MISS_UPDATE,
STATE_AMO_MISS_WRITE,
STATE_AMO_UPDATE,
STATE_AMO_WRITE,
STATE_SRAM_BUSY,
STATE_PTW_READY,
STATE_PTW_FETCH,
STATE_UNCACHED} statetype;
statetype CurrState, NextState;
// data path
flopen #(`PA_BITS) MemPAdrWReg(.clk(clk), flopen #(`PA_BITS) MemPAdrWReg(.clk(clk),
.en(~StallW), .en(~StallW),
.d(MemPAdrM), .d(MemPAdrM),
.q(MemPAdrW)); .q(MemPAdrW));
mux3 #(INDEXLEN) mux2 #(INDEXLEN)
AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSET-1:OFFSET]), AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSET-1:OFFSET]),
.d1(MemPAdrM[INDEXLEN+OFFSET-1:OFFSET]), .d1(MemPAdrM[INDEXLEN+OFFSET-1:OFFSET]),
.d2(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]), .s(SelAdrM),
.s(AdrSel), .y(AdrMuxOut));
.y(SRAMAdr));
genvar way;
mux2 #(INDEXLEN)
SelAdrlMux2(.d0(AdrMuxOut),
.d1(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]),
.s(SRAMWordWriteEnableW),
.y(SRAMAdr));
genvar way;
generate generate
for(way = 0; way < NUMWAYS; way = way + 1) begin for(way = 0; way < NUMWAYS; way = way + 1) begin
DCacheMem #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN)) DCacheMem #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN))
@ -134,11 +139,11 @@ module dcache
.WriteEnable(SRAMWriteEnable[way]), .WriteEnable(SRAMWriteEnable[way]),
.WriteWordEnable(SRAMWordEnable[way]), .WriteWordEnable(SRAMWordEnable[way]),
.WriteData(SRAMWriteData), .WriteData(SRAMWriteData),
.WriteTag(WriteTag), .WriteTag(MemPAdrW[`PA_BITS-1:OFFSET+INDEXLEN]),
.SetValid(SetValid), .SetValid(SetValidW),
.ClearValid(ClearValid), .ClearValid(ClearValidW),
.SetDirty(SetDirty), .SetDirty(SetDirtyW),
.ClearDirty(ClearDirty), .ClearDirty(ClearDirtyW),
.ReadData(ReadDataM[way]), .ReadData(ReadDataM[way]),
.ReadTag(ReadTag[way]), .ReadTag(ReadTag[way]),
.Valid(Valid[way]), .Valid(Valid[way]),
@ -150,10 +155,13 @@ module dcache
always_ff @(posedge clk, posedge reset) begin always_ff @(posedge clk, posedge reset) begin
if (reset) ReplacementBits <= '0; if (reset) ReplacementBits <= '0;
else if (WriteEnable) ReplacementBits[MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]] <= NewReplacement; else if (SRAMWriteEnable) ReplacementBits[MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]] <= NewReplacement;
end end
assign Hit = |WayHit; // *** TODO add replacement policy
assign NewReplacement = '0;
assign CacheHit = |WayHit;
assign ReadDataSelectWayM = |ReadDataMaskedM; // second part of AO mux. assign ReadDataSelectWayM = |ReadDataMaskedM; // second part of AO mux.
// Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can
@ -184,12 +192,28 @@ module dcache
.d(WriteDataM), .d(WriteDataM),
.q(WriteDataW)); .q(WriteDataW));
flopr #(3) Funct3WReg(.clk(clk),
.reset(reset),
.d(Funct3M),
.q(Funct3W));
subwordwrite subwordwrite(.HRDATA(ReadDataW), subwordwrite subwordwrite(.HRDATA(ReadDataW),
.HADDRD(MemPAdrM[`XLEN/8-1:0]), .HADDRD(MemPAdrM[`XLEN/8-1:0]),
.HSIZED(Funct3W), .HSIZED(Funct3W),
.HWDATAIN(WriteDataW), .HWDATAIN(WriteDataW),
.HWDATA(FinalWriteDataW)); .HWDATA(FinalWriteDataW));
generate
if (`A_SUPPORTED) begin
logic [`XLEN-1:0] AMOResult;
amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataW), .funct(Funct7W), .width(Funct3W),
.result(AMOResult));
mux2 #(`XLEN) wdmux(FinalWriteDataW, AMOResult, SelAMOWrite & AtomicW[1], FinalAMOWriteDataW);
end else
assign FinalAMOWriteDataW = FinalWriteDataW;
endgenerate
// register the fetch data from the next level of memory. // register the fetch data from the next level of memory.
generate generate
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
@ -200,22 +224,205 @@ module dcache
end end
endgenerate endgenerate
flopenr #(LOGWPL+1)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
assign AHBPAdr = (FetchCount << (`XLEN/8)) + MemPAdrM;
// remove later
assign AHBSize = 3'b000;
// mux between the CPU's write and the cache fetch. // mux between the CPU's write and the cache fetch.
generate generate
for(index = 0; index < WORDSPERLINE; index++) begin for(index = 0; index < WORDSPERLINE; index++) begin
assign FinalWriteDataWordsW[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalWriteDataW; assign FinalWriteDataWordsW[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalAMOWriteDataW;
end end
endgenerate endgenerate
mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsW), mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsW),
.d1(DCacheMemWriteData), .d1(DCacheMemWriteData),
.s(SelMemWriteData), .s(SRAMBlockWriteEnableM),
.y(SRAMWriteData)); .y(SRAMWriteData));
// control path *** eventually move to own module.
logic AnyCPUReqM;
logic FetchCountFlag;
logic PreCntEn;
logic CntEn;
logic CntReset;
typedef enum {STATE_READY,
STATE_READ_MISS_FETCH_WDV,
STATE_READ_MISS_FETCH_DONE,
STATE_READ_MISS_CHECK_EVICTED_DIRTY,
STATE_READ_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_READ_MISS_WRITE_CACHE_BLOCK,
STATE_READ_MISS_READ_WORD,
STATE_WRITE_MISS_FETCH_WDV,
STATE_WRITE_MISS_FETCH_DONE,
STATE_WRITE_MISS_CHECK_EVICTED_DIRTY,
STATE_WRITE_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_WRITE_MISS_WRITE_CACHE_BLOCK,
STATE_WRITE_MISS_WRITE_WORD,
STATE_AMO_MISS_FETCH_WDV,
STATE_AMO_MISS_FETCH_DONE,
STATE_AMO_MISS_CHECK_EVICTED_DIRTY,
STATE_AMO_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_AMO_MISS_WRITE_CACHE_BLOCK,
STATE_AMO_MISS_READ_WORD,
STATE_AMO_MISS_UPDATE_WORD,
STATE_AMO_MISS_WRITE_WORD,
STATE_AMO_UPDATE,
STATE_AMO_WRITE,
STATE_SRAM_BUSY,
STATE_PTW_READY,
STATE_PTW_MISS_FETCH_WDV,
STATE_PTW_MISS_FETCH_DONE,
STATE_PTW_MISS_CHECK_EVICTED_DIRTY,
STATE_PTW_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_PTW_MISS_WRITE_CACHE_BLOCK,
STATE_PTW_MISS_READ_SRAM,
STATE_UNCACHED_WDV,
STATE_UNCACHED_DONE} statetype;
statetype CurrState, NextState;
localparam FetchCountThreshold = WORDSPERLINE - 1;
assign AnyCPUReqM = |MemRWM | (|AtomicM);
assign FetchCountFlag = (FetchCount == FetchCountThreshold);
flopenr #(LOGWPL+1)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
assign SRAMWriteEnable = SRAMBlockWriteEnableM | SRAMWordWriteEnableW;
flopr #(1+4+2)
SRAMWritePipeReg(.clk(clk),
.reset(reset),
.d({SRAMWordWriteEnableM, SetValidM, ClearValidM, SetDiryM, ClearDirtyM, AtomicM}),
.q({SRAMWordWriteEnableW, SetValidW, ClearValidM, SetDiryM, ClearDirtyM, AtomicW}));
// fsm state regs
flopenl #(.TYPE(statetype))
FSMReg(.clk(clk),
.load(reset),
.en(1'b1),
.val(STATE_READY),
.d(NextState),
.q(CurrState));
// next state logic and some state ouputs.
always_comb begin
DCacheStall = 1'b0;
SelAdrM = 2'b00;
PreCntEn = 1'b0;
SetValidM = 1'b0;
ClearValidM = 1'b0;
SetDirtyM = 1'b0;
ClearDirtyM = 1'b0;
SelMemWriteDataM = 1'b0;
SRAMWordWriteEnableM = 1'b0;
SRAMBlockWriteEnableM = 1'b0;
SaveSRAMRead = 1'b1;
CntReset = 1'b0;
case (CurrState)
STATE_READY: begin
// sram busy
if (AnyCPUReqM & SRAMWordWriteEnableW) begin
NextState = STATE_BUSY;
DCacheStall = 1'b1;
end
// TLB Miss
else if(AnyCPUReqM & DTLBMissM) begin
NextState = STATE_PTW_MISS_FETCH_WDV;
end
// amo hit
else if(|AtomicM & ~UncachedM & ~FSMReg & CacheHit & ~DTLBMissM) begin
NextState = STATE_AMO_UPDATE;
DCacheStall = 1'b1;
end
// read hit valid cached
else if(MemRWM[1] & ~UncachedM & ~FaultM & CacheHit & ~DTLBMissM) begin
NextState = STATE_READY;
DCacheStall = 1'b0;
end
// write hit valid cached
else if (MemRWM[0] & ~UncachedM & ~FaultM & CacheHit & ~DTLBMissM) begin
NextState = STATE_READY;
DCacheStall = 1'b0;
SRAMWordWriteEnableM = 1'b1;
SetDirtyM = 1'b1;
end
// read miss valid cached
else if(MemRWM[1] & ~UncachedM & ~FaultM & ~CacheHit & ~DTLBMissM) begin
NextState = STATE_READ_MISS_FETCH_WDV;
CntReset = 1'b1;
DCacheStall = 1'b1;
end
// fault
else if(|MemRWM & FaultM & ~DTLBMissM) begin
NextState = STATE_READY;
end
end
STATE_AMO_UPDATE: begin
NextState = STATE_AMO_WRITE;
SaveSRAMRead = 1'b1;
SRAMWordWriteEnableM = 1'b1; // pipelined 1 cycle
end
STATE_AMO_WRITE: begin
NextState = STATE_READY;
SelAMOWrite = 1'b1;
end
STATE_READ_MISS_FETCH_WDV: begin
DCacheStall = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & AHBAck) begin
NextState = STATE_READ_MISS_FETCH_DONE;
end else begin
NextState = STATE_READ_MISS_FETCH_WDV;
end
end
STATE_READ_MISS_FETCH_DONE: begin
DCacheStall = 1'b1;
NextState = STATE_READ_MISS_CHECK_EVICTED_DIRTY;
end
STATE_PTW_MISS_FETCH_WDV: begin
DCacheStall = 1'b1;
AdrSel = 2'b01;
if (FetchCountFlag & AHBAck) begin
NextState = STATE_PTW_MISS_FETCH_DONE;
end else begin
NextState = STATE_PTW_MISS_FETCH_WDV;
end
end
default: begin
end
endcase
end
assign CntEn = PreCntEn & AHBAck;
endmodule; // dcache endmodule; // dcache