cvw/pipelined/src/lsu/lsu.sv

427 lines
16 KiB
Systemverilog
Raw Normal View History

///////////////////////////////////////////
// lsu.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Load/Store Unit
// Top level of the memory-stage hart logic
// Contains data cache, DTLB, subword read/write datapath, interface to external bus
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module lsu
(
input logic clk, reset,
input logic StallM, FlushM, StallW, FlushW,
output logic LSUStall,
// Memory Stage
// connected to cpu (controls)
input logic [1:0] MemRWM,
input logic [2:0] Funct3M,
input logic [6:0] Funct7M,
input logic [1:0] AtomicM,
input logic TrapM,
input logic FlushDCacheM,
output logic CommittedM,
output logic SquashSCW,
output logic DCacheMiss,
output logic DCacheAccess,
// address and write data
input logic [`XLEN-1:0] IEUAdrE,
(* mark_debug = "true" *)output logic [`XLEN-1:0] IEUAdrM,
input logic [`XLEN-1:0] WriteDataM,
output logic [`XLEN-1:0] ReadDataM,
// cpu privilege
input logic [1:0] PrivilegeModeW,
input logic DTLBFlushM,
// faults
output logic DTLBLoadPageFaultM, DTLBStorePageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
// cpu hazard unit (trap)
output logic StoreMisalignedFaultM, StoreAccessFaultM,
// connect to ahb
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUBusAdr,
(* mark_debug = "true" *) output logic LSUBusRead,
(* mark_debug = "true" *) output logic LSUBusWrite,
(* mark_debug = "true" *) input logic LSUBusAck,
(* mark_debug = "true" *) input logic [`XLEN-1:0] LSUBusHRDATA,
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUBusHWDATA,
(* mark_debug = "true" *) output logic [2:0] LSUBusSize,
// mmu management
// page table walker
input logic [`XLEN-1:0] SATP_REGW, // from csr
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
input logic [1:0] STATUS_MPP,
input logic [`XLEN-1:0] PCF,
input logic ITLBMissF,
output logic [`XLEN-1:0] PTE,
output logic [1:0] PageType,
output logic ITLBWriteF,
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // *** this one especially has a large note attached to it in pmpchecker.
);
logic DTLBPageFaultM;
logic [`PA_BITS-1:0] LSUPAdrM; // from mmu to dcache
logic [`XLEN+1:0] IEUAdrExtM;
logic DTLBMissM;
logic DTLBWriteM;
logic [1:0] LSURWM;
logic [1:0] PreLSURWM;
logic [2:0] LSUFunct3M;
logic [6:0] LSUFunct7M;
logic [1:0] LSUAtomicM;
(* mark_debug = "true" *) logic [`PA_BITS-1:0] PreLSUPAdrM, LocalLSUBusAdr;
logic [11:0] PreLSUAdrE, LSUAdrE;
2021-12-20 04:21:03 +00:00
logic CPUBusy;
logic MemReadM;
logic DCacheStall;
logic CacheableM;
2021-12-20 04:24:07 +00:00
logic SelHPTW;
2021-12-20 04:00:28 +00:00
logic BusStall;
logic InterlockStall;
logic IgnoreRequest;
logic BusCommittedM, DCacheCommittedM;
2022-01-14 23:02:28 +00:00
// Execute Stage Logic
// Execute-Memory Stage Registers (and Cache fires on this edge)
flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM);
2022-01-14 23:02:28 +00:00
// Memory Stage Logic
assign IEUAdrExtM = {2'b00, IEUAdrM};
2022-01-14 23:02:28 +00:00
// HPTW and Interlock FSM (only needed if VM supported)
2022-01-05 16:25:08 +00:00
if(`MEM_VIRTMEM) begin : MEM_VIRTMEM
logic AnyCPUReqM;
logic [`PA_BITS-1:0] HPTWAdr;
logic HPTWRead;
logic [2:0] HPTWSize;
logic SelReplayCPURequest;
assign AnyCPUReqM = (|MemRWM) | (|AtomicM);
interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF,
.DTLBMissM, .DTLBWriteM, .TrapM, .DCacheStall,
2022-01-05 16:25:08 +00:00
.InterlockStall, .SelReplayCPURequest, .SelHPTW,
.IgnoreRequest);
hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM,
.ITLBMissF(ITLBMissF & ~TrapM),
.DTLBMissM(DTLBMissM & ~TrapM),
2022-01-13 17:04:48 +00:00
.PTE, .PageType, .ITLBWriteF, .DTLBWriteM,
2022-01-05 16:25:08 +00:00
.HPTWReadPTE(ReadDataM),
2022-01-13 17:04:48 +00:00
.DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize);
2022-01-05 16:25:08 +00:00
// arbiter between IEU and hptw
// multiplex the outputs to LSU
mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLSURWM);
mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M);
mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M);
mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM);
mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE);
mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLSUPAdrM);
2022-01-05 16:25:08 +00:00
// always block interrupts when using the hardware page table walker.
assign CPUBusy = StallW & ~SelHPTW;
// It is not possible to pipeline hptw as the following load will depend on the previous load's
// data. Therefore we don't need a pipeline register
//flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle
// Specify which type of page fault is occurring
assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLSURWM[1];
assign DTLBStorePageFaultM = DTLBPageFaultM & PreLSURWM[0];
2022-01-05 16:25:08 +00:00
// When replaying CPU memory request after PTW select the IEUAdrM for correct address.
assign LSUAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLSUAdrE;
2022-01-05 16:25:08 +00:00
end // if (`MEM_VIRTMEM)
else begin
assign InterlockStall = 1'b0;
assign LSUAdrE = PreLSUAdrE;
2022-01-05 16:25:08 +00:00
assign SelHPTW = 1'b0;
assign IgnoreRequest = 1'b0;
assign PTE = '0;
assign PageType = '0;
assign DTLBWriteM = 1'b0;
assign ITLBWriteF = 1'b0;
assign PreLSURWM = MemRWM;
assign LSUFunct3M = Funct3M;
assign LSUFunct7M = Funct7M;
assign LSUAtomicM = AtomicM;
assign PreLSUAdrE = IEUAdrE[11:0];
assign PreLSUPAdrM = IEUAdrExtM;
2022-01-05 16:25:08 +00:00
assign CPUBusy = StallW;
assign DTLBLoadPageFaultM = 1'b0;
assign DTLBStorePageFaultM = 1'b0;
end
2022-01-14 23:02:28 +00:00
// **** look into this confusing signal.
// This signal is confusing. CommittedM tells the CPU's trap unit the current instruction
// in the memory stage is a memory operaton and that memory operation is either completed
// or is partially executed. This signal is only low for the first cycle of a memory
// operation.
// **** I think there is also a bug here. Data cache misses and TLB misses both
// set this bit in the first cycle. It is not strickly wrong, but it may be better
// to flush the memory operation at that time.
assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM;
2022-01-14 23:02:28 +00:00
// Outside Pipeline Logic
// MMU and Misalignment fault logic required if privileged unit exists
2022-01-05 16:25:08 +00:00
if(`ZICSR_SUPPORTED == 1) begin : dmmu
logic DataMisalignedM;
mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0))
dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
.PrivilegeModeW, .DisableTranslation(SelHPTW),
.PAdr(PreLSUPAdrM),
2022-01-05 16:25:08 +00:00
.VAdr(IEUAdrM),
.Size(LSUFunct3M[1:0]),
2022-01-05 16:25:08 +00:00
.PTE,
.PageTypeWriteVal(PageType),
.TLBWrite(DTLBWriteM),
.TLBFlush(DTLBFlushM),
.PhysicalAddress(LSUPAdrM),
2022-01-05 16:25:08 +00:00
.TLBMiss(DTLBMissM),
.Cacheable(CacheableM),
.Idempotent(), .AtomicAllowed(),
.TLBPageFault(DTLBPageFaultM),
.InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM,
.AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug
.WriteAccessM(PreLSURWM[0]), .ReadAccessM(PreLSURWM[1]),
2022-01-05 16:25:08 +00:00
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW
); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist?
2022-01-14 23:02:28 +00:00
// *** lump into lsumislaigned module
2022-01-05 16:25:08 +00:00
// Determine if an Unaligned access is taking place
// hptw guarantees alignment, only check inputs from IEU.
always_comb
case(Funct3M[1:0])
2'b00: DataMisalignedM = 0; // lb, sb, lbu
2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu
2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu
2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd
endcase
// If the CPU's (not HPTW's) request is a page fault.
assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1];
assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0];
end else begin
assign LSUPAdrM = PreLSUPAdrM;
2022-01-05 16:25:08 +00:00
assign DTLBMissM = 0;
assign CacheableM = 1;
assign DTLBPageFaultM = 0;
assign LoadAccessFaultM = 0;
assign StoreAccessFaultM = 0;
assign LoadMisalignedFaultM = 0;
assign StoreMisalignedFaultM = 0;
end
assign LSUStall = DCacheStall | InterlockStall | BusStall;
2021-12-28 19:59:07 +00:00
// use PreLSU as prefix for lrsc
2022-01-05 16:25:08 +00:00
if (`A_SUPPORTED) begin:lrsc
assign MemReadM = PreLSURWM[1] & ~(IgnoreRequest) & ~DTLBMissM;
lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLSURWM, .LSUAtomicM, .LSUPAdrM,
.SquashSCW, .LSURWM);
2022-01-05 16:25:08 +00:00
end else begin:lrsc
assign SquashSCW = 0;
assign LSURWM = PreLSURWM;
2022-01-05 16:25:08 +00:00
end
2021-07-18 01:11:41 +00:00
// conditional
// 1. ram // controlled by `MEM_DTIM
// 2. cache `MEM_DCACHE
// 3. wire pass-through
2022-01-05 04:08:18 +00:00
localparam integer WORDSPERLINE = `MEM_DCACHE ? `DCACHE_LINELENINBITS/`XLEN : 1;
localparam integer LOGWPL = `MEM_DCACHE ? $clog2(WORDSPERLINE) : 1;
2022-01-05 04:08:18 +00:00
localparam integer LINELEN = `MEM_DCACHE ? `DCACHE_LINELENINBITS : `XLEN;
localparam integer WordCountThreshold = `MEM_DCACHE ? WORDSPERLINE - 1 : 0;
2022-01-05 04:08:18 +00:00
localparam integer LINEBYTELEN = LINELEN/8;
localparam integer OFFSETLEN = $clog2(LINEBYTELEN);
// temp
logic [`XLEN-1:0] FinalAMOWriteDataM, FinalWriteDataM;
(* mark_debug = "true" *) logic [`XLEN-1:0] PreLSUBusHWDATA;
logic [`XLEN-1:0] ReadDataWordM;
2022-01-05 04:08:18 +00:00
logic [LINELEN-1:0] DCacheMemWriteData;
// keep
logic [`XLEN-1:0] ReadDataWordMuxM;
logic [`PA_BITS-1:0] DCacheBusAdr;
2022-01-05 04:08:18 +00:00
logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0];
logic DCacheWriteLine;
logic DCacheFetchLine;
logic DCacheBusAck;
logic SelUncachedAdr;
2022-01-05 16:25:08 +00:00
if(`MEM_DCACHE) begin : dcache
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .DCACHE(1))
dcache(.clk, .reset, .CPUBusy,
.RW(CacheableM ? LSURWM : 2'b00), .FlushCache(FlushDCacheM), .Atomic(CacheableM ? LSUAtomicM : 2'b00),
.NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.FinalWriteData(FinalWriteDataM), .ReadDataWord(ReadDataWordM), .CacheStall(DCacheStall),
.CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequest, .CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataLineSets(ReadDataLineSetsM), .CacheMemWriteData(DCacheMemWriteData),
.CacheFetchLine(DCacheFetchLine), .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0));
2022-01-05 16:25:08 +00:00
end else begin : passthrough
if(!`MEM_DTIM) assign ReadDataWordM = 0;
2022-01-05 16:25:08 +00:00
assign DCacheStall = 0;
assign DCacheMiss = CacheableM;
2022-01-05 16:25:08 +00:00
assign DCacheAccess = CacheableM;
assign DCacheCommittedM = 0;
assign DCacheWriteLine = 0;
assign DCacheFetchLine = 0;
assign DCacheBusAdr = 0;
assign ReadDataLineSetsM[0] = 0;
end
// select between dcache and direct from the BUS. Always selected if no dcache.
mux2 #(`XLEN) UnCachedDataMux(.d0(ReadDataWordM),
.d1(DCacheMemWriteData[`XLEN-1:0]),
.s(SelUncachedAdr),
.y(ReadDataWordMuxM));
// sub word selection for read and writes and optional amo alu.
// finally swr
subwordread subwordread(.ReadDataWordMuxM,
.LSUPAdrM(LSUPAdrM[2:0]),
.Funct3M(LSUFunct3M),
.ReadDataM);
2022-01-05 16:25:08 +00:00
if (`A_SUPPORTED) begin : amo
logic [`XLEN-1:0] AMOResult;
amoalu amoalu(.srca(ReadDataM), .srcb(WriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]),
2022-01-05 16:25:08 +00:00
.result(AMOResult));
mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, LSUAtomicM[1], FinalAMOWriteDataM);
2022-01-05 16:25:08 +00:00
end else
assign FinalAMOWriteDataM = WriteDataM;
// this might only get instantiated if there is a dcache or dtim.
// There is a copy in the ebu.
subwordwrite subwordwrite(.HRDATA(ReadDataWordM),
.HADDRD(LSUPAdrM[2:0]),
.HSIZED({LSUFunct3M[2], 1'b0, LSUFunct3M[1:0]}),
.HWDATAIN(FinalAMOWriteDataM),
.HWDATA(FinalWriteDataM));
if (`MEM_DTIM == 1) begin : dtim
simpleram #(
.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram (
.HCLK(clk), .HRESETn(~reset),
.HSELRam(1'b1), .HADDR(LSUPAdrM[31:0]),
.HWRITE(LSURWM[0]), .HREADY(1'b1),
.HTRANS(|LSURWM ? 2'b10 : 2'b00), .HWDATA(FinalWriteDataM), .HREADRam(ReadDataWordM),
.HRESPRam(), .HREADYRam());
// since we have a local memory the bus connections are all disabled.
// There are no peripherals supported.
assign BusStall = 0;
assign LSUBusWrite = 0;
assign LSUBusRead = 0;
assign DCacheBusAck = 0;
assign BusCommittedM = 0;
assign SelUncachedAdr = 0;
end else begin : bus
// Bus Side logic
// register the fetch data from the next level of memory.
// This register should be necessary for timing. There is no register in the uncore or
// ahblite controller between the memories and this cache.
logic [LOGWPL-1:0] WordCount;
genvar index;
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
flopen #(`XLEN) fb(.clk,
.en(LSUBusAck & LSUBusRead & (index == WordCount)),
.d(LSUBusHRDATA),
.q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN]));
end
assign LocalLSUBusAdr = SelUncachedAdr ? LSUPAdrM : DCacheBusAdr ;
assign LSUBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLSUBusAdr;
assign PreLSUBusHWDATA = ReadDataLineSetsM[WordCount];
// exclude the subword write for uncached. We don't read the data first so we cannot
// select the subword by masking. Subword write also exists inside the uncore to
// suport subword masking for i/o. I'm not sure if this is necessary.
assign LSUBusHWDATA = SelUncachedAdr ? FinalAMOWriteDataM : PreLSUBusHWDATA;
if (`XLEN == 32) assign LSUBusSize = SelUncachedAdr ? LSUFunct3M : 3'b010;
else assign LSUBusSize = SelUncachedAdr ? LSUFunct3M : 3'b011;
busfsm #(WordCountThreshold, LOGWPL, `MEM_DCACHE)
busfsm(.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
.LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusRead,
.DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount);
end
2021-12-28 22:48:08 +00:00
endmodule