cvw/wally-pipelined/src/cache/icache.sv
2021-08-30 10:49:24 -05:00

303 lines
9.9 KiB
Systemverilog

///////////////////////////////////////////
// icache.sv
//
// Written: jaallen@g.hmc.edu 2021-03-02
// Modified:
//
// Purpose: Cache instructions for the ifu so it can access memory less often, saving cycles
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module icache
(
// Basic pipeline stuff
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
input logic [`PA_BITS-1:0] PCNextF,
input logic [`PA_BITS-1:0] PCPF,
// Data read in from the ebu unit
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// Read requested from the ebu unit
output logic [`PA_BITS-1:0] InstrPAdrF,
output logic InstrReadF,
// High if the instruction currently in the fetch stage is compressed
output logic CompressedF,
// High if the icache is requesting a stall
output logic ICacheStallF,
input logic ITLBMissF,
input logic ITLBWriteF,
input logic WalkerInstrPageFaultF,
// The raw (not decompressed) instruction that was requested
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] FinalInstrRawF
);
// Configuration parameters
localparam integer BLOCKLEN = `ICACHE_BLOCKLENINBITS;
localparam integer NUMLINES = `ICACHE_WAYSIZEINBYTES*8/`ICACHE_BLOCKLENINBITS;
localparam integer BLOCKBYTELEN = BLOCKLEN/8;
localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN);
localparam integer INDEXLEN = $clog2(NUMLINES);
localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN;
localparam WORDSPERLINE = BLOCKLEN/`XLEN;
localparam LOGWPL = $clog2(WORDSPERLINE);
localparam FetchCountThreshold = WORDSPERLINE - 1;
localparam BlockByteLength = BLOCKLEN / 8;
localparam OFFSETWIDTH = $clog2(BlockByteLength);
localparam integer PA_WIDTH = `PA_BITS - 2;
localparam integer NUMWAYS = 4;
// Input signals to cache memory
logic FlushMem;
logic ICacheMemWriteEnable;
logic [BLOCKLEN-1:0] ICacheMemWriteData;
logic [`PA_BITS-1:0] PCTagF, PCNextIndexF;
// Output signals from cache memory
logic [31:0] ICacheMemReadData;
logic ICacheMemReadValid;
logic ICacheReadEn;
logic [BLOCKLEN-1:0] ReadLineF;
logic [15:0] SpillDataBlock0;
logic spill;
logic spillSave;
logic FetchCountFlag;
logic CntEn;
logic [1:0] SelAdr_q;
logic [LOGWPL-1:0] FetchCount, NextFetchCount;
logic [`PA_BITS-1:0] PCPSpillF;
logic CntReset;
logic [1:0] SelAdr;
logic SavePC;
logic [INDEXLEN-1:0] RAdr;
logic [NUMWAYS-1:0] VictimWay;
logic LRUWriteEn;
logic [NUMWAYS-1:0] WayHit;
logic hit;
logic [BLOCKLEN-1:0] ReadDataBlockWayMasked [NUMWAYS-1:0];
logic CacheableF;
logic [`PA_BITS-1:0] BasePAdrF, BasePAdrMaskedF;
logic [OFFSETLEN-1:0] BasePAdrOffsetF;
// on spill we want to get the first 2 bytes of the next cache block.
// the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can
// simply add 2 to land on the next cache block.
assign PCPSpillF = PCPF + {{{PA_WIDTH}{1'b0}}, 2'b10}; // *** modelsim does not allow the use of PA_BITS for literal width.
mux3 #(INDEXLEN)
AdrSelMux(.d0(PCNextF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
.d1(PCPF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
.d2(PCPSpillF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
.s(SelAdr),
.y(RAdr));
cacheway #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN), .OFFSETLEN(OFFSETLEN), .INDEXLEN(INDEXLEN),
.DIRTY_BITS(0))
icachemem[NUMWAYS-1:0](.clk,
.reset,
.RAdr(RAdr),
.PAdr(PCTagF),
.WriteEnable(ICacheMemWriteEnable), // *** connect
.WriteWordEnable('1),
.TagWriteEnable(ICacheMemWriteEnable), // *** connect
.WriteData(ICacheMemWriteData),
.SetValid(ICacheMemWriteEnable),
.ClearValid(1'b0),
.SetDirty(1'b0),
.ClearDirty(1'b0),
.SelEvict(1'b0),
.VictimWay,
.ReadDataBlockWayMasked,
.WayHit,
.VictimDirtyWay(),
.VictimTagWay()
);
generate
if(NUMWAYS > 1) begin
cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES)
cachereplacementpolicy(.clk, .reset,
.WayHit,
.VictimWay,
.MemPAdrM(PCTagF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
.RAdr,
.LRUWriteEn); // *** connect
end else begin
assign VictimWay = 1'b1; // one hot.
end
endgenerate
assign hit = | WayHit;
// ReadDataBlockWayMasked is a 2d array of cache block len by number of ways.
// Need to OR together each way in a bitwise manner.
// Final part of the AO Mux. First is the AND in the cacheway.
or_rows #(NUMWAYS, BLOCKLEN) ReadDataAOMux(.a(ReadDataBlockWayMasked), .y(ReadLineF));
always_comb begin
case (PCTagF[4:1])
0: ICacheMemReadData = ReadLineF[31:0];
1: ICacheMemReadData = ReadLineF[47:16];
2: ICacheMemReadData = ReadLineF[63:32];
3: ICacheMemReadData = ReadLineF[79:48];
4: ICacheMemReadData = ReadLineF[95:64];
5: ICacheMemReadData = ReadLineF[111:80];
6: ICacheMemReadData = ReadLineF[127:96];
7: ICacheMemReadData = ReadLineF[143:112];
8: ICacheMemReadData = ReadLineF[159:128];
9: ICacheMemReadData = ReadLineF[175:144];
10: ICacheMemReadData = ReadLineF[191:160];
11: ICacheMemReadData = ReadLineF[207:176];
12: ICacheMemReadData = ReadLineF[223:192];
13: ICacheMemReadData = ReadLineF[239:208];
14: ICacheMemReadData = ReadLineF[255:224];
15: ICacheMemReadData = {16'b0, ReadLineF[255:240]};
endcase
end
// spills require storing the first cache block so it can merged
// with the second
// can optimize size, for now just make it the size of the data
// leaving the cache memory.
flopenr #(16) SpillInstrReg(.clk(clk),
.en(spillSave),
.reset(reset),
.d(ICacheMemReadData[15:0]),
.q(SpillDataBlock0));
assign FinalInstrRawF = spill ? {ICacheMemReadData[15:0], SpillDataBlock0} : ICacheMemReadData;
// Detect if the instruction is compressed
assign CompressedF = FinalInstrRawF[1:0] != 2'b11;
assign spill = PCPF[4:1] == 4'b1111 ? 1'b1 : 1'b0;
// to compute the fetch address we need to add the bit shifted
// counter output to the address.
assign FetchCountFlag = (FetchCount == FetchCountThreshold[LOGWPL-1:0]);
flopenr #(LOGWPL)
FetchCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextFetchCount),
.q(FetchCount));
assign NextFetchCount = FetchCount + 1'b1;
// store read data from memory interface before writing into SRAM.
genvar i;
generate
for (i = 0; i < WORDSPERLINE; i++) begin:storebuffer
flopenr #(`XLEN) sb(.clk(clk),
.reset(reset),
.en(InstrAckF & (i == FetchCount)),
.d(InstrInF),
.q(ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]));
end
endgenerate
// this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later.
// *** read enable may not be necessary.
flopenr #(2) SelAdrReg(.clk(clk),
.reset(reset),
.en(ICacheReadEn),
.d(SelAdr),
.q(SelAdr_q));
assign PCTagF = SelAdr_q[1] ? PCPSpillF : PCPF;
// unlike the dcache the victim is never dirty so no eviction is necessary.
/* -----\/----- EXCLUDED -----\/-----
mux2 #(`PA_BITS) BaseAdrMux(.d0(PCTagF),
.d1({VictimTag, PCTagF[INDEXLEN+OFFSETLEN-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
.s(SelEvict),
.y(BasePAdrF));
-----/\----- EXCLUDED -----/\----- */
assign BasePAdrF = PCTagF;
// if not cacheable the offset bits needs to be sent to the EBU.
// if cacheable the offset bits are discarded. $ FSM will fetch the whole block.
assign CacheableF = 1'b1; // *** BUG needs to be an input from MMU.
assign BasePAdrOffsetF = CacheableF ? {{OFFSETLEN}{1'b0}} : BasePAdrF[OFFSETLEN-1:0];
assign BasePAdrMaskedF = {BasePAdrF[`PA_BITS-1:OFFSETLEN], BasePAdrOffsetF};
assign InstrPAdrF = ({{`PA_BITS-LOGWPL{1'b0}}, FetchCount} << $clog2(`XLEN/8)) + BasePAdrMaskedF;
// truncate the offset from PCPF for memory address generation
icachefsm #(.BLOCKLEN(BLOCKLEN))
controller(.clk,
.reset,
.StallF,
.ICacheReadEn,
.ICacheMemWriteEnable,
.ICacheStallF,
.ITLBMissF,
.ITLBWriteF,
.WalkerInstrPageFaultF,
.InstrAckF,
.InstrReadF,
.hit,
.FetchCountFlag,
.spill,
.spillSave,
.CntEn,
.CntReset,
.SelAdr,
.SavePC
);
// For now, assume no writes to executable memory
assign FlushMem = 1'b0;
endmodule