cvw/pipelined/src/lsu/lsu.sv

318 lines
15 KiB
Systemverilog
Raw Normal View History

///////////////////////////////////////////
// lsu.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Load/Store Unit
// Top level of the memory-stage core logic
// Contains data cache, DTLB, subword read/write datapath, interface to external bus
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
2022-01-15 01:19:44 +00:00
module lsu (
input logic clk, reset,
input logic StallM, FlushM, StallW, FlushW,
2022-01-15 00:24:16 +00:00
output logic LSUStallM,
// connected to cpu (controls)
input logic [1:0] MemRWM,
input logic [2:0] Funct3M,
input logic [6:0] Funct7M,
input logic [1:0] AtomicM,
input logic TrapM,
input logic FlushDCacheM,
output logic CommittedM,
output logic SquashSCW,
output logic DCacheMiss,
output logic DCacheAccess,
// address and write data
input logic [`XLEN-1:0] IEUAdrE,
(* mark_debug = "true" *)output logic [`XLEN-1:0] IEUAdrM,
input logic [`XLEN-1:0] WriteDataM,
output logic [`XLEN-1:0] ReadDataM,
// cpu privilege
input logic [1:0] PrivilegeModeW,
input logic DTLBFlushM,
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
// cpu hazard unit (trap)
output logic StoreAmoMisalignedFaultM, StoreAmoAccessFaultM,
// connect to ahb
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUBusAdr,
(* mark_debug = "true" *) output logic LSUBusRead,
(* mark_debug = "true" *) output logic LSUBusWrite,
(* mark_debug = "true" *) input logic LSUBusAck,
(* mark_debug = "true" *) input logic [`XLEN-1:0] LSUBusHRDATA,
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUBusHWDATA,
(* mark_debug = "true" *) output logic [2:0] LSUBusSize,
// page table walker
input logic [`XLEN-1:0] SATP_REGW, // from csr
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
input logic [1:0] STATUS_MPP,
input logic [`XLEN-1:0] PCF,
input logic ITLBMissF,
output logic [`XLEN-1:0] PTE,
output logic [1:0] PageType,
output logic ITLBWriteF,
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // *** this one especially has a large note attached to it in pmpchecker.
2022-01-15 01:19:44 +00:00
);
logic [`PA_BITS-1:0] LSUPAdrM; // from mmu to dcache
logic [`XLEN+1:0] IEUAdrExtM;
logic DTLBMissM;
logic DTLBWriteM;
logic [1:0] LSURWM;
logic [1:0] PreLSURWM;
logic [2:0] LSUFunct3M;
logic [6:0] LSUFunct7M;
logic [1:0] LSUAtomicM;
(* mark_debug = "true" *) logic [`PA_BITS-1:0] PreLSUPAdrM, LocalLSUBusAdr;
logic [11:0] PreLSUAdrE, LSUAdrE;
2021-12-20 04:21:03 +00:00
logic CPUBusy;
logic MemReadM;
2022-01-15 00:39:07 +00:00
logic DCacheStallM;
logic CacheableM;
2021-12-20 04:24:07 +00:00
logic SelHPTW;
logic BusStall;
logic InterlockStall;
logic IgnoreRequest;
logic BusCommittedM, DCacheCommittedM;
2022-01-14 23:55:27 +00:00
////////////////////////////////////////////////////////////////////////////////////////////////
2022-01-14 23:02:28 +00:00
// HPTW and Interlock FSM (only needed if VM supported)
2022-01-14 23:55:27 +00:00
// MMU include PMP and is needed if any privileged supported
////////////////////////////////////////////////////////////////////////////////////////////////
2022-01-15 00:24:16 +00:00
flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM);
assign IEUAdrExtM = {2'b00, IEUAdrM};
2022-01-05 16:25:08 +00:00
if(`MEM_VIRTMEM) begin : MEM_VIRTMEM
// *** encapsulate as lsuvirtmem
2022-01-05 16:25:08 +00:00
logic AnyCPUReqM;
logic [`PA_BITS-1:0] HPTWAdr;
logic HPTWRead;
logic [2:0] HPTWSize;
logic SelReplayCPURequest;
assign AnyCPUReqM = (|MemRWM) | (|AtomicM);
interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF,
2022-01-15 00:39:07 +00:00
.DTLBMissM, .DTLBWriteM, .TrapM, .DCacheStallM,
2022-01-05 16:25:08 +00:00
.InterlockStall, .SelReplayCPURequest, .SelHPTW,
.IgnoreRequest);
hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM,
.ITLBMissF(ITLBMissF & ~TrapM),
.DTLBMissM(DTLBMissM & ~TrapM),
2022-01-13 17:04:48 +00:00
.PTE, .PageType, .ITLBWriteF, .DTLBWriteM,
2022-01-05 16:25:08 +00:00
.HPTWReadPTE(ReadDataM),
2022-01-15 00:39:07 +00:00
.DCacheStallM, .HPTWAdr, .HPTWRead, .HPTWSize);
2022-01-05 16:25:08 +00:00
// arbiter between IEU and hptw
// multiplex the outputs to LSU
mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLSURWM);
mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M);
mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M);
mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM);
mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE);
2022-01-27 22:03:00 +00:00
mux2 #(12) replaymux(PreLSUAdrE, IEUAdrM[11:0], SelReplayCPURequest, LSUAdrE); // replay cpu request after hptw.
mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLSUPAdrM);
2022-01-05 16:25:08 +00:00
// always block interrupts when using the hardware page table walker.
assign CPUBusy = StallW & ~SelHPTW;
end // if (`MEM_VIRTMEM)
else begin
assign {InterlockStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF} = '0;
assign IgnoreRequest = TrapM;
2022-01-05 16:25:08 +00:00
assign CPUBusy = StallW;
2022-01-15 00:24:16 +00:00
assign LSUAdrE = PreLSUAdrE; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM;
assign PreLSURWM = MemRWM; assign PreLSUAdrE = IEUAdrE[11:0]; assign PreLSUPAdrM = IEUAdrExtM;
end
2022-01-14 23:02:28 +00:00
// **** look into this confusing signal.
// This signal is confusing. CommittedM tells the CPU's trap unit the current instruction
// in the memory stage is a memory operaton and that memory operation is either completed
// or is partially executed. This signal is only low for the first cycle of a memory
// operation.
// **** I think there is also a bug here. Data cache misses and TLB misses both
// set this bit in the first cycle. It is not strickly wrong, but it may be better
// to flush the memory operation at that time.
assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM;
2022-01-14 23:02:28 +00:00
// MMU and Misalignment fault logic required if privileged unit exists
2022-01-05 16:25:08 +00:00
if(`ZICSR_SUPPORTED == 1) begin : dmmu
mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0))
dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
.PrivilegeModeW, .DisableTranslation(SelHPTW),
.PAdr(PreLSUPAdrM),
2022-01-05 16:25:08 +00:00
.VAdr(IEUAdrM),
.Size(LSUFunct3M[1:0]),
2022-01-05 16:25:08 +00:00
.PTE,
.PageTypeWriteVal(PageType),
.TLBWrite(DTLBWriteM),
.TLBFlush(DTLBFlushM),
.PhysicalAddress(LSUPAdrM),
2022-01-05 16:25:08 +00:00
.TLBMiss(DTLBMissM),
.Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(),
.InstrAccessFaultF(), .LoadAccessFaultM, .StoreAmoAccessFaultM,
.InstrPageFaultF(),.LoadPageFaultM, .StoreAmoPageFaultM,
.LoadMisalignedFaultM, .StoreAmoMisalignedFaultM,
.AtomicAccessM(|LSUAtomicM), .ExecuteAccessF(1'b0), // **** change this to just use PreLSURWM
.WriteAccessM(PreLSURWM[0]), .ReadAccessM(PreLSURWM[1]),
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW);
2022-01-05 16:25:08 +00:00
end else begin
assign {DTLBMissM, LoadAccessFaultM, StoreAmoAccessFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM} = '0;
assign {LoadPageFaultM, StoreAmoPageFaultM} = '0;
assign LSUPAdrM = PreLSUPAdrM;
assign CacheableM = '1;
2022-01-05 16:25:08 +00:00
end
2022-01-15 00:39:07 +00:00
assign LSUStallM = DCacheStallM | InterlockStall | BusStall;
2022-01-14 23:55:27 +00:00
////////////////////////////////////////////////////////////////////////////////////////////////
// Hart Memory System
// Either Data Cache or Data Tightly Integrated Memory or just bus interface
////////////////////////////////////////////////////////////////////////////////////////////////
2021-07-18 01:11:41 +00:00
2022-01-05 04:08:18 +00:00
localparam integer WORDSPERLINE = `MEM_DCACHE ? `DCACHE_LINELENINBITS/`XLEN : 1;
localparam integer LOGWPL = `MEM_DCACHE ? $clog2(WORDSPERLINE) : 1;
2022-01-05 04:08:18 +00:00
localparam integer LINELEN = `MEM_DCACHE ? `DCACHE_LINELENINBITS : `XLEN;
localparam integer WordCountThreshold = `MEM_DCACHE ? WORDSPERLINE - 1 : 0;
2022-01-05 04:08:18 +00:00
localparam integer LINEBYTELEN = LINELEN/8;
localparam integer OFFSETLEN = $clog2(LINEBYTELEN);
logic [`XLEN-1:0] FinalAMOWriteDataM, FinalWriteDataM;
(* mark_debug = "true" *) logic [`XLEN-1:0] PreLSUBusHWDATA;
logic [`XLEN-1:0] ReadDataWordM;
2022-01-05 04:08:18 +00:00
logic [LINELEN-1:0] DCacheMemWriteData;
logic [`XLEN-1:0] ReadDataWordMuxM;
logic [`PA_BITS-1:0] DCacheBusAdr;
2022-01-05 04:08:18 +00:00
logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0];
logic DCacheWriteLine;
logic DCacheFetchLine;
logic DCacheBusAck;
logic SelUncachedAdr;
2022-01-14 23:55:27 +00:00
if (`MEM_DTIM) begin : dtim
/* Consider restructuring with higher level blocks. Try drawing block diagrams with several pages of schematics,
one for top level, one for each sublevel, alternate with either dtim or bus. If this looks more satisfactory,
restructure code accordingly.
dtim dtim (.clk, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, .ReadDataWordM,
.BusStallM, .LSUBusWrite, .LSUBusRead, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr,
.ReadDataWordMuxM, .DCacheStallM, .DCacheCommittedM, .DCacheWriteLine, .DCacheFetchLine, .DCacheBusAdr,
.ReadDataLineSetsM, .DCacheMiss, .DCacheAccess); */
2022-01-25 19:46:13 +00:00
// *** adjust interface so write address doesn't need delaying; switch to standard RAM?
2022-01-15 01:19:44 +00:00
simpleram #(.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram (
2022-01-25 17:34:15 +00:00
.clk,
.a(CPUBusy | LSURWM[0] ? IEUAdrM[31:0] : IEUAdrE[31:0]),
.we(LSURWM[0] & ~TrapM), // have to ignore write if Trap.
2022-01-25 18:26:31 +00:00
.wd(FinalWriteDataM), .rd(ReadDataWordM));
// since we have a local memory the bus connections are all disabled.
// There are no peripherals supported.
2022-01-14 23:55:27 +00:00
assign {BusStall, LSUBusWrite, LSUBusRead, DCacheBusAck, BusCommittedM, SelUncachedAdr} = '0;
2022-01-15 00:03:03 +00:00
assign ReadDataWordMuxM = ReadDataWordM;
2022-01-15 01:11:17 +00:00
assign {DCacheStallM, DCacheCommittedM, DCacheWriteLine, DCacheFetchLine, DCacheBusAdr} = '0;
assign ReadDataLineSetsM[0] = 0;
assign DCacheMiss = 1'b0; assign DCacheAccess = 1'b0;
end else begin : bus
// Bus Side logic
// register the fetch data from the next level of memory.
// This register should be necessary for timing. There is no register in the uncore or
// ahblite controller between the memories and this cache.
2022-01-15 00:39:07 +00:00
busdp #(WORDSPERLINE, LINELEN, LOGWPL, WordCountThreshold)
busdp(.clk, .reset,
.LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusHWDATA, .LSUBusSize,
.LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .ReadDataLineSetsM, .DCacheFetchLine,
.DCacheWriteLine, .DCacheBusAck, .DCacheMemWriteData, .LSUPAdrM, .FinalAMOWriteDataM,
.ReadDataWordM, .ReadDataWordMuxM, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM,
.BusStall, .BusCommittedM);
2022-01-15 00:39:07 +00:00
if(`MEM_DCACHE) begin : dcache
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .DCACHE(1))
dcache(.clk, .reset, .CPUBusy,
.RW(CacheableM ? LSURWM : 2'b00), .FlushCache(FlushDCacheM), .Atomic(CacheableM ? LSUAtomicM : 2'b00),
.NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.FinalWriteData(FinalWriteDataM), .ReadDataWord(ReadDataWordM), .CacheStall(DCacheStallM),
.CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequest, .CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataLineSets(ReadDataLineSetsM), .CacheMemWriteData(DCacheMemWriteData),
.CacheFetchLine(DCacheFetchLine), .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0));
end else begin : passthrough
assign {ReadDataWordM, DCacheStallM, DCacheCommittedM, DCacheWriteLine, DCacheFetchLine, DCacheBusAdr} = '0;
assign ReadDataLineSetsM[0] = 0;
assign DCacheMiss = CacheableM; assign DCacheAccess = CacheableM;
end
end
2022-01-14 23:55:27 +00:00
// sub word selection for read and writes and optional amo alu.
subwordread subwordread(.ReadDataWordMuxM,
.LSUPAdrM(LSUPAdrM[2:0]),
.Funct3M(LSUFunct3M),
.ReadDataM);
// this might only get instantiated if there is a dcache or dtim.
2022-01-15 00:39:07 +00:00
// There is a copy in the ebu. *** is it needed there, or can data come in from ebu, get muxed here and sent back out
// Explore changing feedback path from output of AMOALU to subword write ***
2022-01-14 23:55:27 +00:00
subwordwrite subwordwrite(.HRDATA(ReadDataWordM),
.HADDRD(LSUPAdrM[2:0]),
.HSIZED({LSUFunct3M[2], 1'b0, LSUFunct3M[1:0]}),
.HWDATAIN(FinalAMOWriteDataM),
.HWDATA(FinalWriteDataM));
////////////////////////////////////////////////////////////////////////////////////////////////
// Atomic operations
////////////////////////////////////////////////////////////////////////////////////////////////
if (`A_SUPPORTED) begin:lrsc
/*atomic atomic(.clk, .reset, .FlushW, .CPUBusy, .MemRead, .PreLSURWM, .LSUAtomicM, .LSUPAdrM,
.SquashSCM, .LSURWM, ... ); *** */
2022-01-14 23:55:27 +00:00
logic [`XLEN-1:0] AMOResult;
amoalu amoalu(.srca(ReadDataM), .srcb(WriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]),
.result(AMOResult));
mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, LSUAtomicM[1], FinalAMOWriteDataM);
assign MemReadM = PreLSURWM[1] & ~(IgnoreRequest) & ~DTLBMissM;
lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLSURWM, .LSUAtomicM, .LSUPAdrM,
.SquashSCW, .LSURWM);
end else begin:lrsc
2022-01-15 00:39:07 +00:00
assign SquashSCW = 0;
assign LSURWM = PreLSURWM;
2022-01-14 23:55:27 +00:00
assign FinalAMOWriteDataM = WriteDataM;
end
endmodule