mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
270 lines
9.0 KiB
Systemverilog
270 lines
9.0 KiB
Systemverilog
///////////////////////////////////////////
|
|
// icache.sv
|
|
//
|
|
// Written: jaallen@g.hmc.edu 2021-03-02
|
|
// Modified:
|
|
//
|
|
// Purpose: Cache instructions for the ifu so it can access memory less often, saving cycles
|
|
//
|
|
// A component of the Wally configurable RISC-V project.
|
|
//
|
|
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
|
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
|
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
|
// is furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
|
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
///////////////////////////////////////////
|
|
|
|
`include "wally-config.vh"
|
|
|
|
module icache
|
|
(
|
|
// Basic pipeline stuff
|
|
input logic clk, reset,
|
|
input logic CPUBusy,
|
|
input logic [`PA_BITS-1:0] PCNextF,
|
|
input logic [`PA_BITS-1:0] PCPF,
|
|
input logic [`XLEN-1:0] PCF,
|
|
|
|
input logic ExceptionM, PendingInterruptM,
|
|
|
|
// Data read in from the ebu unit
|
|
(* mark_debug = "true" *) input logic [`XLEN-1:0] IfuBusHRDATA,
|
|
(* mark_debug = "true" *) input logic ICacheBusAck,
|
|
// Read requested from the ebu unit
|
|
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] ICacheBusAdr,
|
|
(* mark_debug = "true" *) output logic IfuBusFetch,
|
|
// High if the instruction currently in the fetch stage is compressed
|
|
output logic CompressedF,
|
|
// High if the icache is requesting a stall
|
|
output logic ICacheStallF,
|
|
input logic CacheableF,
|
|
input logic ITLBMissF,
|
|
input logic ITLBWriteF,
|
|
input logic InvalidateICacheM,
|
|
|
|
// The raw (not decompressed) instruction that was requested
|
|
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
|
|
(* mark_debug = "true" *) output logic [31:0] FinalInstrRawF
|
|
);
|
|
|
|
// Configuration parameters
|
|
localparam integer BLOCKLEN = `ICACHE_BLOCKLENINBITS;
|
|
localparam integer NUMLINES = `ICACHE_WAYSIZEINBYTES*8/`ICACHE_BLOCKLENINBITS;
|
|
localparam integer BLOCKBYTELEN = BLOCKLEN/8;
|
|
|
|
localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN);
|
|
localparam integer INDEXLEN = $clog2(NUMLINES);
|
|
localparam integer TAGLEN = `PA_BITS - OFFSETLEN - INDEXLEN;
|
|
|
|
localparam WORDSPERLINE = BLOCKLEN/`XLEN;
|
|
localparam LOGWPL = $clog2(WORDSPERLINE);
|
|
|
|
localparam FetchCountThreshold = WORDSPERLINE - 1;
|
|
|
|
localparam integer PA_WIDTH = `PA_BITS - 2;
|
|
localparam integer NUMWAYS = `ICACHE_NUMWAYS;
|
|
|
|
|
|
// Input signals to cache memory
|
|
logic ICacheMemWriteEnable;
|
|
logic [BLOCKLEN-1:0] ICacheMemWriteData;
|
|
logic [`PA_BITS-1:0] FinalPCPF;
|
|
// Output signals from cache memory
|
|
logic [31:0] ICacheMemReadData;
|
|
logic ICacheReadEn;
|
|
logic [BLOCKLEN-1:0] ReadLineF;
|
|
|
|
|
|
logic [15:0] SpillDataBlock0;
|
|
logic spill;
|
|
logic spillSave;
|
|
|
|
logic FetchCountFlag;
|
|
logic CntEn;
|
|
|
|
logic [1:1] SelAdr_q;
|
|
|
|
|
|
logic [LOGWPL-1:0] FetchCount, NextFetchCount;
|
|
|
|
logic [`PA_BITS-1:0] PCPSpillF;
|
|
|
|
logic CntReset;
|
|
logic [1:0] SelAdr;
|
|
logic [INDEXLEN-1:0] RAdr;
|
|
logic [NUMWAYS-1:0] VictimWay;
|
|
logic LRUWriteEn;
|
|
logic [NUMWAYS-1:0] WayHit;
|
|
logic hit;
|
|
|
|
|
|
logic [BLOCKLEN-1:0] ReadDataLineWayMasked [NUMWAYS-1:0];
|
|
|
|
logic [31:0] ReadLineSetsF [`ICACHE_BLOCKLENINBITS/16-1:0];
|
|
|
|
logic [`PA_BITS-1:0] BasePAdrMaskedF;
|
|
logic [OFFSETLEN-1:0] BasePAdrOffsetF;
|
|
|
|
|
|
logic [NUMWAYS-1:0] SRAMWayWriteEnable;
|
|
|
|
|
|
// on spill we want to get the first 2 bytes of the next cache block.
|
|
// the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can
|
|
// simply add 2 to land on the next cache block.
|
|
assign PCPSpillF = PCPF + {{{PA_WIDTH}{1'b0}}, 2'b10};
|
|
|
|
mux3 #(INDEXLEN)
|
|
AdrSelMux(.d0(PCNextF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
|
|
.d1(PCF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
|
|
.d2(PCPSpillF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
|
|
.s(SelAdr),
|
|
.y(RAdr));
|
|
|
|
|
|
cacheway #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN),
|
|
.OFFSETLEN(OFFSETLEN), .INDEXLEN(INDEXLEN), .DIRTY_BITS(0))
|
|
MemWay[NUMWAYS-1:0](.clk, .reset, .RAdr,
|
|
.PAdr(FinalPCPF),
|
|
.WriteEnable(SRAMWayWriteEnable),
|
|
.VDWriteEnable(1'b0),
|
|
.WriteWordEnable({{(BLOCKLEN/`XLEN){1'b1}}}),
|
|
.TagWriteEnable(SRAMWayWriteEnable),
|
|
.WriteData(ICacheMemWriteData),
|
|
.SetValid(ICacheMemWriteEnable),
|
|
.ClearValid(1'b0), .SetDirty(1'b0), .ClearDirty(1'b0), .SelEvict(1'b0),
|
|
.VictimWay,
|
|
.FlushWay(1'b0), .SelFlush(1'b0),
|
|
.ReadDataLineWayMasked, .WayHit,
|
|
.VictimDirtyWay(), .VictimTagWay(),
|
|
.InvalidateAll(InvalidateICacheM));
|
|
|
|
generate
|
|
if(NUMWAYS > 1) begin
|
|
cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES)
|
|
cachereplacementpolicy(.clk, .reset,
|
|
.WayHit,
|
|
.VictimWay,
|
|
.LsuPAdrM(FinalPCPF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]),
|
|
.RAdr,
|
|
.LRUWriteEn);
|
|
end else begin
|
|
assign VictimWay = 1'b1; // one hot.
|
|
end
|
|
endgenerate
|
|
|
|
assign hit = | WayHit;
|
|
|
|
// ReadDataLineWayMasked is a 2d array of cache block len by number of ways.
|
|
// Need to OR together each way in a bitwise manner.
|
|
// Final part of the AO Mux. First is the AND in the cacheway.
|
|
or_rows #(NUMWAYS, BLOCKLEN) ReadDataAOMux(.a(ReadDataLineWayMasked), .y(ReadLineF));
|
|
|
|
genvar index;
|
|
generate
|
|
for(index = 0; index < BLOCKLEN / 16 - 1; index++) begin
|
|
assign ReadLineSetsF[index] = ReadLineF[((index+1)*16)+16-1 : (index*16)];
|
|
end
|
|
assign ReadLineSetsF[BLOCKLEN/16-1] = {16'b0, ReadLineF[BLOCKLEN-1:BLOCKLEN-16]};
|
|
endgenerate
|
|
|
|
assign ICacheMemReadData = ReadLineSetsF[FinalPCPF[$clog2(BLOCKLEN / 32) + 1 : 1]];
|
|
|
|
// spills require storing the first cache block so it can merged
|
|
// with the second
|
|
// can optimize size, for now just make it the size of the data
|
|
// leaving the cache memory.
|
|
flopenr #(16) SpillInstrReg(.clk(clk),
|
|
.en(spillSave),
|
|
.reset(reset),
|
|
.d(ICacheMemReadData[15:0]),
|
|
.q(SpillDataBlock0));
|
|
|
|
assign FinalInstrRawF = spill ? {ICacheMemReadData[15:0], SpillDataBlock0} : ICacheMemReadData;
|
|
|
|
// Detect if the instruction is compressed
|
|
assign CompressedF = FinalInstrRawF[1:0] != 2'b11;
|
|
assign spill = &PCF[$clog2(BLOCKLEN/32)+1:1];
|
|
|
|
|
|
// to compute the fetch address we need to add the bit shifted
|
|
// counter output to the address.
|
|
assign FetchCountFlag = (FetchCount == FetchCountThreshold[LOGWPL-1:0]);
|
|
|
|
flopenr #(LOGWPL)
|
|
FetchCountReg(.clk(clk),
|
|
.reset(reset | CntReset),
|
|
.en(CntEn),
|
|
.d(NextFetchCount),
|
|
.q(FetchCount));
|
|
|
|
assign NextFetchCount = FetchCount + 1'b1;
|
|
|
|
|
|
// store read data from memory interface before writing into SRAM.
|
|
genvar i;
|
|
generate
|
|
for (i = 0; i < WORDSPERLINE; i++) begin:storebuffer
|
|
flopenr #(`XLEN) sb(.clk(clk),
|
|
.reset(reset),
|
|
.en(ICacheBusAck & (i == FetchCount)),
|
|
.d(IfuBusHRDATA),
|
|
.q(ICacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]));
|
|
end
|
|
endgenerate
|
|
|
|
|
|
// this mux needs to be delayed 1 cycle as it occurs 1 pipeline stage later.
|
|
// *** read enable may not be necessary.
|
|
flopenr #(1) SelAdrReg(.clk(clk),
|
|
.reset(reset),
|
|
.en(ICacheReadEn),
|
|
.d(SelAdr[1]),
|
|
.q(SelAdr_q[1]));
|
|
|
|
assign FinalPCPF = SelAdr_q[1] ? PCPSpillF : PCPF;
|
|
|
|
// if not cacheable the offset bits needs to be sent to the EBU.
|
|
// if cacheable the offset bits are discarded. $ FSM will fetch the whole block.
|
|
assign BasePAdrOffsetF = CacheableF ? {{OFFSETLEN}{1'b0}} : FinalPCPF[OFFSETLEN-1:0];
|
|
assign BasePAdrMaskedF = {FinalPCPF[`PA_BITS-1:OFFSETLEN], BasePAdrOffsetF};
|
|
|
|
assign ICacheBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, FetchCount} << $clog2(`XLEN/8)) + BasePAdrMaskedF;
|
|
|
|
// truncate the offset from PCPF for memory address generation
|
|
|
|
assign SRAMWayWriteEnable = ICacheMemWriteEnable ? VictimWay : '0;
|
|
|
|
icachefsm controller(.clk,
|
|
.reset,
|
|
.CPUBusy,
|
|
.ICacheReadEn,
|
|
.ICacheMemWriteEnable,
|
|
.ICacheStallF,
|
|
.ITLBMissF,
|
|
.ITLBWriteF,
|
|
.ExceptionM,
|
|
.PendingInterruptM,
|
|
.ICacheBusAck,
|
|
.IfuBusFetch,
|
|
.hit,
|
|
.FetchCountFlag,
|
|
.spill,
|
|
.spillSave,
|
|
.CntEn,
|
|
.CntReset,
|
|
.SelAdr,
|
|
.LRUWriteEn);
|
|
|
|
endmodule
|
|
|