Partial implementation of the data cache. Missing the fsm.

This commit is contained in:
Ross Thompson 2021-07-07 17:52:16 -05:00
parent 412691df2d
commit 1fe06bc670
4 changed files with 330 additions and 186 deletions

107
wally-pipelined/src/cache/DCacheMem.sv vendored Normal file
View File

@ -0,0 +1,107 @@
///////////////////////////////////////////
// DCacheMem (Memory for the Data Cache)
//
// Written: ross1728@gmail.com July 07, 2021
// Implements the data, tag, valid, dirty, and replacement bits.
//
// Purpose: Storage and read/write access to data cache data, tag valid, dirty, and replacement.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module DCacheMem #(parameter NUMLINES=512, parameter BLOCKLEN = 256, TAGLEN = 26)
(input logic clk,
input logic reset,
input logic [$clog2(NUMLINES)-1:0] Adr,
input logic [$clog2(NUMLINES)-1:0] WAdr, // write address for valid and dirty only
input logic WriteEnable,
input logic [BLOCKLEN/`XLEN-1:0] WriteWordEnable,
input logic [BLOCKLEN-1:0] WriteData,
input logic [TAGLEN-1:0] WriteTag,
input logic SetValid,
input logic ClearValid,
input logic SetDirty,
input logic ClearDirty,
output logic [BLOCKLEN-1:0] ReadData,
output logic [TAGLEN-1:0] ReadTag,
output logic Valid,
output logic Dirty
);
genvar words;
generate
for(words = 0; words < BLOCKLEN/`XLEN; words++) begin
sram1rw #(.DEPTH(`XLEN),
.WIDTH(NUMLINES))
CacheDataMem(.clk(clk),
.Addr(Adr),
.ReadData(ReadData[(words+1)*`XLEN-1:words*`XLEN]),
.WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]),
.WriteEnable(WriteEnable & WriteWordEnable[words]));
end
endgenerate
sram1rw #(.DEPTH(TAGLEN),
.WIDTH(NUMLINES))
CacheTagMem(.clk(clk),
.Addr(Adr),
.ReadData(ReadTag),
.WriteData(WriteTag),
.WriteEnable(WriteEnable));
sram1rw #(.DEPTH(BLOCKLEN),
.WIDTH(NUMLINES))
CacheDataMem(.clk(clk),
.Addr(Adr),
.ReadData(ReadData),
.WriteData(WriteData),
.WriteEnable(WriteEnable));
sram1rw #(.DEPTH(TAGLEN),
.WIDTH(NUMLINES))
CacheTagMem(.clk(clk),
.Addr(Adr),
.ReadData(ReadTag),
.WriteData(WriteTag),
.WriteEnable(WriteEnable));
always_ff @(posedge clk, posedge reset) begin
if (reset)
ValidBits <= {NUMLINES{1'b0}};
else if (SetValid & WriteEnable) ValidBits[WAdr] <= 1'b1;
else if (ClearValid & WriteEnable) ValidBits[WAdr] <= 1'b0;
Valid <= ValidBits[Adr];
end
always_ff @(posedge clk, posedge reset) begin
if (reset)
DirtyBits <= {NUMLINES{1'b0}};
else if (SetDirty & WriteEnable) DirtyBits[WAdr] <= 1'b1;
else if (ClearDirty & WriteEnable) DirtyBits[WAdr] <= 1'b0;
Dirty <= DirtyBits[Adr];
end
endmodule; // DCacheMemWay

View File

@ -8,8 +8,8 @@ module ICacheMem #(parameter NUMLINES=512, parameter BLOCKLEN = 256)
// If flush is high, invalidate the entire cache
input logic flush,
input logic [`PA_BITS-1:0] PCTagF, // physical address
input logic [`PA_BITS-1:0] PCNextIndexF, // virtual address
input logic [`PA_BITS-1:0] PCTagF, // physical address
input logic [`PA_BITS-1:0] PCNextIndexF, // virtual address
input logic WriteEnable,
input logic [BLOCKLEN-1:0] WriteLine,
output logic [BLOCKLEN-1:0] ReadLineF,

221
wally-pipelined/src/cache/dcache.sv vendored Normal file
View File

@ -0,0 +1,221 @@
///////////////////////////////////////////
// dcache (data cache)
//
// Written: ross1728@gmail.com July 07, 2021
// Implements the L1 data cache
//
// Purpose: Storage for data and meta data.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module dcache
(input logic clk,
input logic reset,
input logic StallM,
input logic StallW,
input logic FlushM,
input logic FlushW,
// cpu side
input logic [1:0] MemRWM,
input logic [2:0] Funct3M,
input logic [1:0] AtomicM,
input logic [`PA_BITS-1:0] MemAdrE, // virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] MemPAdrM, // physical address
input logic [`XLEN-1:0] WriteDataM,
output logic [`XLEN-1:0] ReadDataW,
output logic DCacheStall,
// inputs from TLB and PMA/P
input logic FaultM,
input logic DTLBMissM,
// ahb side
output logic [`PA_BITS-1:0] AHBPAdr, // to ahb
output logic AHBRead,
output logic AHBWrite,
input logic AHBAck, // from ahb
input logic [`XLEN-1:0] HRDATA, // from ahb
output logic [`XLEN-1:0] HWDATA, // to ahb
output logic [2:0] AHBSize
);
localparam integer BLOCKLEN = 256;
localparam integer NUMLINES = 512;
localparam integer NUMWAYS = 4;
localparam integer NUMREPL_BITS = 3;
localparam integer BLOCKBYTELEN = BLOCKLEN/8;
localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN);
localparam integer INDEXLEN = $clog2(NUMLINES);
localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN;
localparam integer WORDSPERLINE = BLOCKLEN/`XLEN;
logic [1:0] AdrSel;
logic [`PA_BITS-1:0] MemPAdrW;
logic [INDEXLEN-1:0] SRAMAdr;
logic [NUMWAYS-1:0] WriteEnable;
logic [NUMWAYS-1:0] WriteWordEnable;
logic [BLOCKLEN-1:0] SRAMWriteData;
logic [TAGLEN-1:0] WriteTag;
logic SetValid, ClearValid;
logic SetDirty, ClearDirty;
logic [BLOCKLEN-1:0] ReadDataM, ReadDataMaskedM [NUMWAYS-1:0];
logic [TAGLEN-1:0] TagData [NUMWAYS-1:0];
logic [NUMWAYS-1:0] Valid, Dirty, WayHit;
logic Hit;
logic [NUMREPL_BITS-1:0] ReplacementBits, NewReplacement;
logic [BLOCKLEN-1:0] ReadDataSelectWayM;
logic [`XLEN-1:0] ReadDataSelectWayXLEN [(WORDSPERLINE)-1:0];
logic [`XLEN-1:0] WordReadDataM, FinalReadDataM;
logic [`XLEN-1:0] WriteDataW, FinalWriteDataW;
logic [BLOCKLEN-1:0] FinalWriteDataWordsW;
typedef enum {STATE_READY,
STATE_MISS_FETCH_WDV,
STATE_MISS_FETCH_DONE,
STATE_MISS_WRITE_BACK,
STATE_MISS_READ_SRAM,
STATE_AMO_MISS_FETCH_WDV,
STATE_AMO_MISS_FETCH_DONE,
STATE_AMO_MISS_WRITE_BACK,
STATE_AMO_MISS_READ_SRAM,
STATE_AMO_MISS_UPDATE,
STATE_AMO_MISS_WRITE,
STATE_AMO_UPDATE,
STATE_AMO_WRITE,
STATE_SRAM_BUSY,
STATE_PTW_READY,
STATE_PTW_FETCH,
STATE_UNCACHED} statetype;
statetype CurrState, NextState;
flopen #(`PA_BITS) MemPAdrWReg(.clk(clk),
.en(~StallW),
.d(MemPAdrM),
.q(MemPAdrW));
mux3 #(INDEXLEN)
AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSET-1:OFFSET]),
.d1(MemPAdrM[INDEXLEN+OFFSET-1:OFFSET]),
.d2(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]),
.s(AdrSel),
.y(SRAMAdr));
genvar way;
generate
for(way = 0; way < NUMWAYS; way = way + 1) begin
DCacheMem #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN))
MemWay(.clk(clk),
.reset(reset),
.Adr(SRAMAdr),
.WAdr(MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]),
.WriteEnable(SRAMWriteEnable[way]),
.WriteWordEnable(SRAMWordEnable[way]),
.WriteData(SRAMWriteData),
.WriteTag(WriteTag),
.SetValid(SetValid),
.ClearValid(ClearValid),
.SetDirty(SetDirty),
.ClearDirty(ClearDirty),
.ReadData(ReadDataM[way]),
.ReadTag(ReadTag[way]),
.Valid(Valid[way]),
.Dirty(Dirty[way]));
assign WayHit = Valid & (ReadTag[way] == MemAdrM);
assign ReadDataMaskedM = Valid[way] ? ReadDataM[way] : '0; // first part of AO mux.
end
endgenerate
always_ff @(posedge clk, posedge reset) begin
if (reset) ReplacementBits <= '0;
else if (WriteEnable) ReplacementBits[MemPAdrW[INDEXLEN+OFFSET-1:OFFSET]] <= NewReplacement;
end
assign Hit = |WayHit;
assign ReadDataSelectWayM = |ReadDataMaskedM; // second part of AO mux.
// Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can
// easily build a variable input mux.
genvar index;
generate
for (index = 0; index < WORDSPERLINE; index++) begin
assign ReadDataSelectWayM[index] = ReadDataSelectM[((index+1)*`XLEN)-1: (index*`XLEN)];
end
endgenerate
// variable input mux
assign WordReadDataM = ReadDataSelectWayM[MemPAdrM[WORDSPERLINE+$clog2(`XLEN/8) : $clog2(`XLEN/8)]];
// finally swr
subwordread subwordread(.HRDATA(WordReadDataM),
.HADDRD(MemPAdrM[`XLEN/8-1:0]),
.HSIZED(Funct3M),
.HRDATAMasked(FinalReadDataM));
flopen #(XLEN) ReadDataWReg(.clk(clk),
.en(~StallW),
.d(FinalReadDataM),
.q(ReadDataW));
// write path
flopen #(XLEN) WriteDataWReg(.clk(clk),
.en(~StallW),
.d(WriteDataM),
.q(WriteDataW));
subwordwrite subwordwrite(.HRDATA(ReadDataW),
.HADDRD(MemPAdrM[`XLEN/8-1:0]),
.HSIZED(Funct3W),
.HWDATAIN(WriteDataW),
.HWDATA(FinalWriteDataW));
// register the fetch data from the next level of memory.
generate
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
flopen #(`XLEN) fb(.clk(clk),
.en(AHBAck & (index == FetchCount)),
.d(HRDATA),
.q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN]));
end
endgenerate
// mux between the CPU's write and the cache fetch.
generate
for(index = 0; index < WORDSPERLINE; index++) begin
assign FinalWriteDataWordsW[((index+1)*`XLEN)-1 : (index*`XLEN)] = FinalWriteDataW;
end
endgenerate
mux2 #(BLOCKLEN) WriteDataMux(.d0(FinalWriteDataWordsW),
.d1(DCacheMemWriteData),
.s(SelMemWriteData),
.y(SRAMWriteData));
endmodule; // dcache

View File

@ -1,184 +0,0 @@
///////////////////////////////////////////
// dcache.sv
//
// Written: jaallen@g.hmc.edu 2021-04-15
// Modified:
//
// Purpose: Cache memory for the dmem so it can access memory less often, saving cycles
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module dcache(
// Basic pipeline stuff
input logic clk, reset,
input logic StallW,
input logic FlushW,
// Upper bits of physical address
input logic [`PA_BITS-1:12] UpperPAdrM,
// Lower 12 bits of virtual address, since it's faster this way
input logic [11:0] LowerVAdrM,
// Write to the dcache
input logic [`XLEN-1:0] DCacheWriteDataM,
input logic DCacheReadM, DCacheWriteM,
// Data read in from the ebu unit
input logic [`XLEN-1:0] ReadDataW,
input logic MemAckW,
// Access requested from the ebu unit
output logic [`PA_BITS-1:0] MemPAdrM,
output logic MemReadM, MemWriteM,
// High if the dcache is requesting a stall
output logic DCacheStallW,
// The data that was requested from the cache
output logic [`XLEN-1:0] DCacheReadW
);
// Configuration parameters
// TODO Move these to a config file
localparam integer DCACHELINESIZE = 256;
localparam integer DCACHENUMLINES = 512;
// Input signals to cache memory
logic FlushMem;
logic [`PA_BITS-1:12] DCacheMemUpperPAdr;
logic [11:0] DCacheMemLowerAdr;
logic DCacheMemWriteEnable;
logic [DCACHELINESIZE-1:0] DCacheMemWriteData;
logic [`XLEN-1:0] DCacheMemWritePAdr;
logic EndFetchState;
// Output signals from cache memory
logic [`XLEN-1:0] DCacheMemReadData;
logic DCacheMemReadValid;
wtdirectmappedmem #(.LINESIZE(DCACHELINESIZE), .NUMLINES(DCACHENUMLINES), .WORDSIZE(`XLEN)) cachemem(
.*,
// Stall it if the pipeline is stalled, unless we're stalling it and we're ending our stall
.stall(StallW),
.flush(FlushMem),
.ReadUpperPAdr(DCacheMemUpperPAdr),
.ReadLowerAdr(DCacheMemLowerAdr),
.LoadEnable(DCacheMemWriteEnable),
.LoadLine(DCacheMemWriteData),
.LoadPAdr(DCacheMemWritePAdr),
.DataWord(DCacheMemReadData),
.DataValid(DCacheMemReadValid),
.WriteEnable(0),
.WriteWord(0),
.WritePAdr(0),
.WriteSize(2'b10)
);
dcachecontroller #(.LINESIZE(DCACHELINESIZE)) controller(.*);
// For now, assume no writes to executable memory
assign FlushMem = 1'b0;
endmodule
module dcachecontroller #(parameter LINESIZE = 256) (
// Inputs from pipeline
input logic clk, reset,
input logic StallW,
input logic FlushW,
// Input the address to read
// The upper bits of the physical pc
input logic [`PA_BITS-1:12] DCacheMemUpperPAdr,
// The lower bits of the virtual pc
input logic [11:0] DCacheMemLowerAdr,
// Signals to/from cache memory
// The read coming out of it
input logic [`XLEN-1:0] DCacheMemReadData,
input logic DCacheMemReadValid,
// Load data into the cache
output logic DCacheMemWriteEnable,
output logic [LINESIZE-1:0] DCacheMemWriteData,
output logic [`XLEN-1:0] DCacheMemWritePAdr,
// The read that was requested
output logic [31:0] DCacheReadW,
// Outputs to pipeline control stuff
output logic DCacheStallW, EndFetchState,
// Signals to/from ahblite interface
// A read containing the requested data
input logic [`XLEN-1:0] ReadDataW,
input logic MemAckW,
// The read we request from main memory
output logic [`PA_BITS-1:0] MemPAdrM,
output logic MemReadM, MemWriteM
);
// Cache fault signals
logic FaultStall;
// Handle happy path (data in cache)
always_comb begin
DCacheReadW = DCacheMemReadData;
end
// Handle cache faults
localparam integer WORDSPERLINE = LINESIZE/`XLEN;
localparam integer LOGWPL = $clog2(WORDSPERLINE);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/8);
logic FetchState, BeginFetchState;
logic [LOGWPL:0] FetchWordNum, NextFetchWordNum;
logic [`PA_BITS-1:0] LineAlignedPCPF;
flopr #(1) FetchStateFlop(clk, reset, BeginFetchState | (FetchState & ~EndFetchState), FetchState);
flopr #(LOGWPL+1) FetchWordNumFlop(clk, reset, NextFetchWordNum, FetchWordNum);
genvar i;
generate
for (i=0; i < WORDSPERLINE; i++) begin:sb
flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]);
end
endgenerate
// Enter the fetch state when we hit a cache fault
always_comb begin
BeginFetchState = ~DCacheMemReadValid & ~FetchState & (FetchWordNum == 0);
end
// Exit the fetch state once the cache line has been loaded
flopr #(1) EndFetchStateFlop(clk, reset, DCacheMemWriteEnable, EndFetchState);
// Machinery to request the correct addresses from main memory
always_comb begin
MemReadM = FetchState & ~EndFetchState & ~DCacheMemWriteEnable;
LineAlignedPCPF = {DCacheMemUpperPAdr, DCacheMemLowerAdr[11:OFFSETWIDTH], {OFFSETWIDTH{1'b0}}};
MemPAdrM = LineAlignedPCPF + FetchWordNum*(`XLEN/8);
NextFetchWordNum = FetchState ? FetchWordNum+MemAckW : {LOGWPL+1{1'b0}};
end
// Write to cache memory when we have the line here
always_comb begin
DCacheMemWritePAdr = LineAlignedPCPF;
DCacheMemWriteEnable = FetchWordNum == {1'b1, {LOGWPL{1'b0}}} & FetchState & ~EndFetchState;
end
// Stall the pipeline while loading a new line from memory
always_comb begin
DCacheStallW = FetchState | ~DCacheMemReadValid;
end
endmodule