This d cache fsm is getting complex.

This commit is contained in:
Ross Thompson 2021-07-08 15:26:16 -05:00
parent adcc7afffa
commit 4c5aee3042
2 changed files with 298 additions and 90 deletions

View File

@ -413,6 +413,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256)
assign NextFetchCount = FetchCount + 1'b1; assign NextFetchCount = FetchCount + 1'b1;
// This part is confusing. // This part is confusing.
// *** Ross Thompson reduce the complexity. This is just dumb.
// we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide // we need to remove the offset bits (PCPTrunkF). Because the AHB interface is XLEN wide
// we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros. // we need to address on that number of bits so the PC is extended to the right by AHBByteLength with zeros.
// fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with // fetch count is already aligned to AHBByteLength, but we need to extend back to the full address width with

View File

@ -47,6 +47,7 @@ module dcache
// inputs from TLB and PMA/P // inputs from TLB and PMA/P
input logic FaultM, input logic FaultM,
input logic DTLBMissM, input logic DTLBMissM,
input logic UncachedM,
// ahb side // ahb side
output logic [`PA_BITS-1:0] AHBPAdr, // to ahb output logic [`PA_BITS-1:0] AHBPAdr, // to ahb
output logic AHBRead, output logic AHBRead,
@ -67,62 +68,66 @@ module dcache
localparam integer INDEXLEN = $clog2(NUMLINES); localparam integer INDEXLEN = $clog2(NUMLINES);
localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN; localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN;
localparam integer WORDSPERLINE = BLOCKLEN/`XLEN; localparam integer WORDSPERLINE = BLOCKLEN/`XLEN;
localparam integer LOGWPL = $clog2(WORDSPERLINE);
logic [1:0] AdrSel;
logic SelAdrM;
logic [`PA_BITS-1:0] MemPAdrW; logic [`PA_BITS-1:0] MemPAdrW;
logic [INDEXLEN-1:0] SRAMAdr; logic [INDEXLEN-1:0] SRAMAdr;
logic [NUMWAYS-1:0] WriteEnable; logic [NUMWAYS-1:0] WriteEnable;
logic [NUMWAYS-1:0] WriteWordEnable; logic [NUMWAYS-1:0] WriteWordEnable;
logic [BLOCKLEN-1:0] SRAMWriteData; logic [BLOCKLEN-1:0] SRAMWriteData;
logic [TAGLEN-1:0] WriteTag; logic SetValidM, ClearValidM, SetValidW, ClearValidW;
logic SetValid, ClearValid; logic SetDirtyM, ClearDirtyM, SetDirtyW, ClearDirtyW;
logic SetDirty, ClearDirty;
logic [BLOCKLEN-1:0] ReadDataM, ReadDataMaskedM [NUMWAYS-1:0]; logic [BLOCKLEN-1:0] ReadDataM, ReadDataMaskedM [NUMWAYS-1:0];
logic [TAGLEN-1:0] TagData [NUMWAYS-1:0]; logic [TAGLEN-1:0] TagData [NUMWAYS-1:0];
logic [NUMWAYS-1:0] Valid, Dirty, WayHit; logic [NUMWAYS-1:0] Valid, Dirty, WayHit;
logic Hit; logic CacheHit;
logic [NUMREPL_BITS-1:0] ReplacementBits, NewReplacement; logic [NUMREPL_BITS-1:0] ReplacementBits, NewReplacement;
logic [BLOCKLEN-1:0] ReadDataSelectWayM; logic [BLOCKLEN-1:0] ReadDataSelectWayM;
logic [`XLEN-1:0] ReadDataSelectWayXLEN [(WORDSPERLINE)-1:0]; logic [`XLEN-1:0] ReadDataSelectWayXLEN [(WORDSPERLINE)-1:0];
logic [`XLEN-1:0] WordReadDataM, FinalReadDataM; logic [`XLEN-1:0] WordReadDataM, FinalReadDataM;
logic [`XLEN-1:0] WriteDataW, FinalWriteDataW; logic [`XLEN-1:0] WriteDataW, FinalWriteDataW, FinalAMOWriteDataW;
logic [BLOCKLEN-1:0] FinalWriteDataWordsW; logic [BLOCKLEN-1:0] FinalWriteDataWordsW;
logic [LOGWPL:0] FetchCount, NextFetchCount;
logic [NUMWAYS-1:0] SRAMWordWriteEnableM, SRAMWordWriteEnableW;
logic [WORDSPERLINE-1:0] SRAMWordEnable [NUMWAYS-1:0];
logic SelMemWriteDataM, SelMemWriteDataW;
logic [2:0] Funct3W;
logic SRAMWordWriteEnableM, SRAMWordWriteEnableW;
logic SRAMBlockWriteEnableM;
logic SRAMWriteEnable;
logic SaveSRAMRead;
logic [1:0] AtomicW;
typedef enum {STATE_READY,
STATE_MISS_FETCH_WDV,
STATE_MISS_FETCH_DONE,
STATE_MISS_WRITE_BACK,
STATE_MISS_READ_SRAM,
STATE_AMO_MISS_FETCH_WDV,
STATE_AMO_MISS_FETCH_DONE,
STATE_AMO_MISS_WRITE_BACK,
STATE_AMO_MISS_READ_SRAM,
STATE_AMO_MISS_UPDATE,
STATE_AMO_MISS_WRITE,
STATE_AMO_UPDATE,
STATE_AMO_WRITE,
STATE_SRAM_BUSY,
STATE_PTW_READY,
STATE_PTW_FETCH,
STATE_UNCACHED} statetype;
statetype CurrState, NextState;
// data path
flopen #(`PA_BITS) MemPAdrWReg(.clk(clk), flopen #(`PA_BITS) MemPAdrWReg(.clk(clk),
.en(~StallW), .en(~StallW),
.d(MemPAdrM), .d(MemPAdrM),
.q(MemPAdrW)); .q(MemPAdrW));
mux3 #(INDEXLEN) mux2 #(INDEXLEN)
AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSET-1:OFFSET]), AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSET-1:OFFSET]),
.d1(MemPAdrM[INDEXLEN+OFFSET-1:OFFSET]), .d1(MemPAdrM[INDEXLEN+OFFSET-1:OFFSET]),
.d2(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]), .s(SelAdrM),
.s(AdrSel), .y(AdrMuxOut));
mux2 #(INDEXLEN)
SelAdrlMux2(.d0(AdrMuxOut),
.d1(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]),
.s(SRAMWordWriteEnableW),
.y(SRAMAdr)); .y(SRAMAdr));
genvar way; genvar way;
generate generate
for(way = 0; way < NUMWAYS; way = way + 1) begin for(way = 0; way < NUMWAYS; way = way + 1) begin
@ -134,11 +139,11 @@ module dcache
.WriteEnable(SRAMWriteEnable[way]), .WriteEnable(SRAMWriteEnable[way]),
.WriteWordEnable(SRAMWordEnable[way]), .WriteWordEnable(SRAMWordEnable[way]),
.WriteData(SRAMWriteData), .WriteData(SRAMWriteData),
.WriteTag(WriteTag), .WriteTag(MemPAdrW[`PA_BITS-1:OFFSET+INDEXLEN]),
.SetValid(SetValid), .SetValid(SetValidW),
.ClearValid(ClearValid), .ClearValid(ClearValidW),
.SetDirty(SetDirty), .SetDirty(SetDirtyW),
.ClearDirty(ClearDirty), .ClearDirty(ClearDirtyW),
.ReadData(ReadDataM[way]), .ReadData(ReadDataM[way]),
.ReadTag(ReadTag[way]), .ReadTag(ReadTag[way]),
.Valid(Valid[way]), .Valid(Valid[way]),
@ -150,10 +155,13 @@ module dcache
always_ff @(posedge clk, posedge reset) begin always_ff @(posedge clk, posedge reset) begin
if (reset) ReplacementBits <= '0; if (reset) ReplacementBits <= '0;
else if (WriteEnable) ReplacementBits[MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]] <= NewReplacement; else if (SRAMWriteEnable) ReplacementBits[MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]] <= NewReplacement;
end end
assign Hit = |WayHit; // *** TODO add replacement policy
assign NewReplacement = '0;
assign CacheHit = |WayHit;
assign ReadDataSelectWayM = |ReadDataMaskedM; // second part of AO mux. assign ReadDataSelectWayM = |ReadDataMaskedM; // second part of AO mux.
// Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can
@ -184,12 +192,28 @@ module dcache
.d(WriteDataM), .d(WriteDataM),
.q(WriteDataW)); .q(WriteDataW));
flopr #(3) Funct3WReg(.clk(clk),
.reset(reset),
.d(Funct3M),
.q(Funct3W));
subwordwrite subwordwrite(.HRDATA(ReadDataW), subwordwrite subwordwrite(.HRDATA(ReadDataW),
.HADDRD(MemPAdrM[`XLEN/8-1:0]), .HADDRD(MemPAdrM[`XLEN/8-1:0]),
.HSIZED(Funct3W), .HSIZED(Funct3W),
.HWDATAIN(WriteDataW), .HWDATAIN(WriteDataW),
.HWDATA(FinalWriteDataW)); .HWDATA(FinalWriteDataW));
generate
if (`A_SUPPORTED) begin
logic [`XLEN-1:0] AMOResult;
amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataW), .funct(Funct7W), .width(Funct3W),
.result(AMOResult));
mux2 #(`XLEN) wdmux(FinalWriteDataW, AMOResult, SelAMOWrite & AtomicW[1], FinalAMOWriteDataW);
end else
assign FinalAMOWriteDataW = FinalWriteDataW;
endgenerate
// register the fetch data from the next level of memory. // register the fetch data from the next level of memory.
generate generate
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
@ -200,22 +224,205 @@ module dcache
end end
endgenerate endgenerate
flopenr #(LOGWPL+1)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
assign AHBPAdr = (FetchCount << (`XLEN/8)) + MemPAdrM;
// remove later
assign AHBSize = 3'b000;
// mux between the CPU's write and the cache fetch. // mux between the CPU's write and the cache fetch.
generate generate
for(index = 0; index < WORDSPERLINE; index++) begin for(index = 0; index < WORDSPERLINE; index++) begin
assign FinalWriteDataWordsW[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalWriteDataW; assign FinalWriteDataWordsW[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalAMOWriteDataW;
end end
endgenerate endgenerate
mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsW), mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsW),
.d1(DCacheMemWriteData), .d1(DCacheMemWriteData),
.s(SelMemWriteData), .s(SRAMBlockWriteEnableM),
.y(SRAMWriteData)); .y(SRAMWriteData));
// control path *** eventually move to own module.
logic AnyCPUReqM;
logic FetchCountFlag;
logic PreCntEn;
logic CntEn;
logic CntReset;
typedef enum {STATE_READY,
STATE_READ_MISS_FETCH_WDV,
STATE_READ_MISS_FETCH_DONE,
STATE_READ_MISS_CHECK_EVICTED_DIRTY,
STATE_READ_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_READ_MISS_WRITE_CACHE_BLOCK,
STATE_READ_MISS_READ_WORD,
STATE_WRITE_MISS_FETCH_WDV,
STATE_WRITE_MISS_FETCH_DONE,
STATE_WRITE_MISS_CHECK_EVICTED_DIRTY,
STATE_WRITE_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_WRITE_MISS_WRITE_CACHE_BLOCK,
STATE_WRITE_MISS_WRITE_WORD,
STATE_AMO_MISS_FETCH_WDV,
STATE_AMO_MISS_FETCH_DONE,
STATE_AMO_MISS_CHECK_EVICTED_DIRTY,
STATE_AMO_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_AMO_MISS_WRITE_CACHE_BLOCK,
STATE_AMO_MISS_READ_WORD,
STATE_AMO_MISS_UPDATE_WORD,
STATE_AMO_MISS_WRITE_WORD,
STATE_AMO_UPDATE,
STATE_AMO_WRITE,
STATE_SRAM_BUSY,
STATE_PTW_READY,
STATE_PTW_MISS_FETCH_WDV,
STATE_PTW_MISS_FETCH_DONE,
STATE_PTW_MISS_CHECK_EVICTED_DIRTY,
STATE_PTW_MISS_WRITE_BACK_EVICTED_BLOCK,
STATE_PTW_MISS_WRITE_CACHE_BLOCK,
STATE_PTW_MISS_READ_SRAM,
STATE_UNCACHED_WDV,
STATE_UNCACHED_DONE} statetype;
statetype CurrState, NextState;
localparam FetchCountThreshold = WORDSPERLINE - 1;
assign AnyCPUReqM = |MemRWM | (|AtomicM);
assign FetchCountFlag = (FetchCount == FetchCountThreshold);
flopenr #(LOGWPL+1)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
assign SRAMWriteEnable = SRAMBlockWriteEnableM | SRAMWordWriteEnableW;
flopr #(1+4+2)
SRAMWritePipeReg(.clk(clk),
.reset(reset),
.d({SRAMWordWriteEnableM, SetValidM, ClearValidM, SetDiryM, ClearDirtyM, AtomicM}),
.q({SRAMWordWriteEnableW, SetValidW, ClearValidM, SetDiryM, ClearDirtyM, AtomicW}));
// fsm state regs
flopenl #(.TYPE(statetype))
FSMReg(.clk(clk),
.load(reset),
.en(1'b1),
.val(STATE_READY),
.d(NextState),
.q(CurrState));
// next state logic and some state ouputs.
always_comb begin
DCacheStall = 1'b0;
SelAdrM = 2'b00;
PreCntEn = 1'b0;
SetValidM = 1'b0;
ClearValidM = 1'b0;
SetDirtyM = 1'b0;
ClearDirtyM = 1'b0;
SelMemWriteDataM = 1'b0;
SRAMWordWriteEnableM = 1'b0;
SRAMBlockWriteEnableM = 1'b0;
SaveSRAMRead = 1'b1;
CntReset = 1'b0;
case (CurrState)
STATE_READY: begin
// sram busy
if (AnyCPUReqM & SRAMWordWriteEnableW) begin
NextState = STATE_BUSY;
DCacheStall = 1'b1;
end
// TLB Miss
else if(AnyCPUReqM & DTLBMissM) begin
NextState = STATE_PTW_MISS_FETCH_WDV;
end
// amo hit
else if(|AtomicM & ~UncachedM & ~FSMReg & CacheHit & ~DTLBMissM) begin
NextState = STATE_AMO_UPDATE;
DCacheStall = 1'b1;
end
// read hit valid cached
else if(MemRWM[1] & ~UncachedM & ~FaultM & CacheHit & ~DTLBMissM) begin
NextState = STATE_READY;
DCacheStall = 1'b0;
end
// write hit valid cached
else if (MemRWM[0] & ~UncachedM & ~FaultM & CacheHit & ~DTLBMissM) begin
NextState = STATE_READY;
DCacheStall = 1'b0;
SRAMWordWriteEnableM = 1'b1;
SetDirtyM = 1'b1;
end
// read miss valid cached
else if(MemRWM[1] & ~UncachedM & ~FaultM & ~CacheHit & ~DTLBMissM) begin
NextState = STATE_READ_MISS_FETCH_WDV;
CntReset = 1'b1;
DCacheStall = 1'b1;
end
// fault
else if(|MemRWM & FaultM & ~DTLBMissM) begin
NextState = STATE_READY;
end
end
STATE_AMO_UPDATE: begin
NextState = STATE_AMO_WRITE;
SaveSRAMRead = 1'b1;
SRAMWordWriteEnableM = 1'b1; // pipelined 1 cycle
end
STATE_AMO_WRITE: begin
NextState = STATE_READY;
SelAMOWrite = 1'b1;
end
STATE_READ_MISS_FETCH_WDV: begin
DCacheStall = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & AHBAck) begin
NextState = STATE_READ_MISS_FETCH_DONE;
end else begin
NextState = STATE_READ_MISS_FETCH_WDV;
end
end
STATE_READ_MISS_FETCH_DONE: begin
DCacheStall = 1'b1;
NextState = STATE_READ_MISS_CHECK_EVICTED_DIRTY;
end
STATE_PTW_MISS_FETCH_WDV: begin
DCacheStall = 1'b1;
AdrSel = 2'b01;
if (FetchCountFlag & AHBAck) begin
NextState = STATE_PTW_MISS_FETCH_DONE;
end else begin
NextState = STATE_PTW_MISS_FETCH_WDV;
end
end
default: begin
end
endcase
end
assign CntEn = PreCntEn & AHBAck;
endmodule; // dcache endmodule; // dcache