From 318ceba34d14a44ef654cd7d0a5ee8ea5982d373 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 11 Jan 2023 18:52:46 -0600 Subject: [PATCH] Improved LSU formating. --- pipelined/src/ieu/alu.sv | 2 + pipelined/src/lsu/lsu.sv | 272 +++++++++++++++++++++------------------ 2 files changed, 146 insertions(+), 128 deletions(-) diff --git a/pipelined/src/ieu/alu.sv b/pipelined/src/ieu/alu.sv index b75a2e73..f562e02f 100644 --- a/pipelined/src/ieu/alu.sv +++ b/pipelined/src/ieu/alu.sv @@ -6,6 +6,8 @@ // Modified: // // Purpose: RISC-V Arithmetic/Logic Unit +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.4) // // A component of the CORE-V-WALLY configurable RISC-V project. // diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 4d083d15..3f7c6641 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -6,9 +6,11 @@ // Modified: 11 January 2023 // // Purpose: Load/Store Unit -// Top level of the memory-stage core logic -// Contains data cache, DTLB, subword read/write datapath, interface to external bus -// +// HPTW, DMMU, data cache, interface to external bus +// Atomic, Endian swap, and subword read/write logic +// +// Documentation: RISC-V System on Chip Design Chapter 9 (Figure 9.2) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -27,100 +29,123 @@ // and limitations under the License. ///////////////////////////////////////////////////////////////////////////////////////////////////////// -// committed means the memory operation in flight cannot be interrupted. -// chap 5 handling faults to memory by delaying writes to memory stage. -// chap 6 combing bus with dtim -// chap 9 complete lsu. - `include "wally-config.vh" module lsu ( - input logic clk, reset, - input logic StallM, FlushM, StallW, FlushW, - output logic LSUStallM, + input logic clk, reset, + input logic StallM, FlushM, StallW, FlushW, + output logic LSUStallM, // LSU stalls pipeline during a multicycle operation. // connected to cpu (controls) - input logic [1:0] MemRWM, - input logic [2:0] Funct3M, - input logic [6:0] Funct7M, - input logic [1:0] AtomicM, - input logic FlushDCacheM, - output logic CommittedM, - output logic SquashSCW, - output logic DCacheMiss, - output logic DCacheAccess, + input logic [1:0] MemRWM, // Read/Write control + input logic [2:0] Funct3M, // Size of memory operation + input logic [6:0] Funct7M, // Atomic memory operation function + input logic [1:0] AtomicM, // Atomic memory operation + input logic FlushDCacheM, // Flush D cache to next level of memory + output logic CommittedM, // Delay interrupts while memory operation in flight + output logic SquashSCW, // Store conditional failed disable write to GPR + output logic DCacheMiss, // D cache miss for performance counters + output logic DCacheAccess, // D cache memory access for performance counters // address and write data - input logic [`XLEN-1:0] IEUAdrE, - (* mark_debug = "true" *)output logic [`XLEN-1:0] IEUAdrM, - (* mark_debug = "true" *)input logic [`XLEN-1:0] WriteDataM, - output logic [`LLEN-1:0] ReadDataW, + input logic [`XLEN-1:0] IEUAdrE, // Execution stage memory address + (* mark_debug = "true" *) output logic [`XLEN-1:0] IEUAdrM, // Memory stage memory address + (* mark_debug = "true" *) input logic [`XLEN-1:0] WriteDataM, // Write data from IEU + output logic [`LLEN-1:0] ReadDataW, // Read data to IEU or FPU // cpu privilege - input logic [1:0] PrivilegeModeW, - input logic BigEndianM, - input logic sfencevmaM, + input logic [1:0] PrivilegeModeW, // Current privilege mode + input logic BigEndianM, // Swap byte order to big endian + input logic sfencevmaM, // Virtual memory address fence // fpu - input logic [`FLEN-1:0] FWriteDataM, - input logic FpLoadStoreM, + input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU + input logic FpLoadStoreM, // Selects FPU as store for write data // faults - output logic LoadPageFaultM, StoreAmoPageFaultM, - output logic LoadMisalignedFaultM, LoadAccessFaultM, HPTWInstrAccessFaultM, + output logic LoadPageFaultM, StoreAmoPageFaultM, // Page fault exceptions + output logic LoadMisalignedFaultM, // Load address misaligned fault + output logic LoadAccessFaultM, // Load access fault (PMA) + output logic HPTWInstrAccessFaultM, // HPTW generated access fault during instruction fetch // cpu hazard unit (trap) - output logic StoreAmoMisalignedFaultM, StoreAmoAccessFaultM, + output logic StoreAmoMisalignedFaultM, // Store or AMO address misaligned fault + output logic StoreAmoAccessFaultM, // Store or AMO access fault // connect to ahb - (* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUHADDR, - (* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, - (* mark_debug = "true" *) output logic [`XLEN-1:0] LSUHWDATA, - (* mark_debug = "true" *) input logic LSUHREADY, - (* mark_debug = "true" *) output logic LSUHWRITE, - (* mark_debug = "true" *) output logic [2:0] LSUHSIZE, - (* mark_debug = "true" *) output logic [2:0] LSUHBURST, - (* mark_debug = "true" *) output logic [1:0] LSUHTRANS, - (* mark_debug = "true" *) output logic [`XLEN/8-1:0] LSUHWSTRB, + (* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUHADDR, // Bus address from LSU to EBU + (* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, // Bus read data from LSU to EBU + (* mark_debug = "true" *) output logic [`XLEN-1:0] LSUHWDATA, // Bus write data from LSU to EBU + (* mark_debug = "true" *) input logic LSUHREADY, // Bus ready from LSU to EBU + (* mark_debug = "true" *) output logic LSUHWRITE, // Bus write operation from LSU to EBU + (* mark_debug = "true" *) output logic [2:0] LSUHSIZE, // Bus operation size from LSU to EBU + (* mark_debug = "true" *) output logic [2:0] LSUHBURST, // Bus burst from LSU to EBU + (* mark_debug = "true" *) output logic [1:0] LSUHTRANS, // Bus transaction type from LSU to EBU + (* mark_debug = "true" *) output logic [`XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU // page table walker - input logic [`XLEN-1:0] SATP_REGW, // from csr - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic [`XLEN-1:0] PCF, - input logic ITLBMissF, - input logic InstrDAPageFaultF, - output logic [`XLEN-1:0] PTE, - output logic [1:0] PageType, - output logic ITLBWriteF, SelHPTW, - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // *** this one especially has a large note attached to it in pmpchecker. + input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege + input logic [1:0] STATUS_MPP, // Machine previous privilege mode + input logic [`XLEN-1:0] PCF, // Fetch PC + input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk + input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB + output logic [1:0] PageType, // Type of page table entry to write to ITLB + output logic ITLBWriteF, // Write PTE to ITLB + output logic SelHPTW, // During a HPTW walk the effective privilege mode becomes S_MODE + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP address from privileged unit ); - logic [`XLEN+1:0] IEUAdrExtM; - logic [`XLEN+1:0] IEUAdrExtE; - logic [`PA_BITS-1:0] PAdrM; - logic DTLBMissM; - logic DTLBWriteM; - logic [1:0] PreLSURWM, LSURWM; - logic [2:0] LSUFunct3M; - logic [6:0] LSUFunct7M; - logic [1:0] LSUAtomicM; - (* mark_debug = "true" *) logic [`XLEN+1:0] IHAdrM; - logic GatedStallW; - logic DCacheStallW; - logic CacheableM; - logic BusStall; - logic HPTWStall; - logic IgnoreRequestTLB; - logic BusCommittedM, DCacheCommittedM; - logic DataDAPageFaultM; - logic [`XLEN-1:0] IHWriteDataM, IMAWriteDataM; - logic [`LLEN-1:0] IMAFWriteDataM; - logic [`LLEN-1:0] ReadDataM; - logic [(`LLEN-1)/8:0] ByteMaskM; - logic SelDTIM; - logic LSULoadAccessFaultM, LSUStoreAmoAccessFaultM; - + logic [`XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer + logic [`XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer + logic [`PA_BITS-1:0] PAdrM; // Physical memory address + (* mark_debug = "true" *) logic [`XLEN+1:0] IHAdrM; // Either IEU or HPTW memory address + + logic [1:0] PreLSURWM; // IEU or HPTW Read/Write signal + logic [1:0] LSURWM; // IEU or HPTW Read/Write signal gated by LR/SC + logic [2:0] LSUFunct3M; // IEU or HPTW memory operation size + logic [6:0] LSUFunct7M; // AMO function gated by HPTW + logic [1:0] LSUAtomicM; // AMO signal gated by HPTW + + logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1 + + logic DCacheStallW; // D$ busy with multicycle operation + logic BusStall; // Bus interface busy with multicycle operation + logic HPTWStall; // HPTW busy with multicycle operation + + logic CacheableM; // PMA indicates memory address is cacheable + logic BusCommittedM; // Bus memory operation in flight, delay interrupts + logic DCacheCommittedM; // D$ memory operation started, delay interrupts + + logic [`LLEN-1:0] DTIMReadDataWordM; // DTIM read data + logic [`LLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [`LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data + logic [`LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data + logic [`LLEN-1:0] ReadDataWordM; // Read data before subword selection + logic [`LLEN-1:0] ReadDataM; // Final read data + + logic [`XLEN-1:0] IHWriteDataM; // IEU or HPTW write data + logic [`XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data + logic [`LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data + logic [`LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data + logic [`LLEN-1:0] LSUWriteDataM; // Final write data + logic [(`LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write + + logic DTLBMissM; // DTLB miss causes HPTW walk + logic DTLBWriteM; // Writes PTE to DTLB + logic DataDAPageFaultM; // DTLB hit needs to update dirty or access bits + logic LSULoadAccessFaultM; // Load acces fault + logic LSUStoreAmoAccessFaultM; // Store access fault + logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle + logic IgnoreRequest; // On FlushM, ignore TLB miss + logic SelDTIM; // Select DTIM rather than bus or D$ + + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Pipeline for IEUAdr E to M + // Zero-extend address to 34 bits for XLEN=32 + ///////////////////////////////////////////////////////////////////////////////////////////// + flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; - assign LSUStallM = DCacheStallW | HPTWStall | BusStall; ///////////////////////////////////////////////////////////////////////////////////////////// - // HPTW(only needed if VM supported) + // HPTW (only needed if VM supported) // MMU include PMP and is needed if any privileged supported ///////////////////////////////////////////////////////////////////////////////////////////// @@ -129,81 +154,72 @@ module lsu ( .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, .FlushW, .DCacheStallW, .SATP_REGW, .PCF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, - .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, + .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN + .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, .IEUAdrExtM, .PTE, .IHWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IHAdrM, .HPTWStall, .SelHPTW, .IgnoreRequestTLB, .LSULoadAccessFaultM, .LSUStoreAmoAccessFaultM, .LoadAccessFaultM, .StoreAmoAccessFaultM, .HPTWInstrAccessFaultM); - end else begin - assign {HPTWStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; + end else begin // No HPTW, so signals are not multiplexed assign PreLSURWM = MemRWM; assign IHAdrM = IEUAdrExtM; - assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; + assign LSUFunct3M = Funct3M; + assign LSUFunct7M = Funct7M; + assign LSUAtomicM = AtomicM; assign IHWriteDataM = WriteDataM; assign LoadAccessFaultM = LSULoadAccessFaultM; assign StoreAmoAccessFaultM = LSUStoreAmoAccessFaultM; + assign {HPTWStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; assign HPTWInstrAccessFaultM = '0; end - // CommittedM tells the CPU's privilege unit the current instruction - // in the memory stage is a memory operaton and that memory operation is either completed - // or is partially executed. Partially completed memory operations need to prevent an interrupts. - // There is not a clean way to restore back to a partial executed instruction. CommiteedM will - // delay the interrupt until the LSU is in a clean state. + // CommittedM indicates the cache, bus, or HPTW are busy with a multiple cycle operation. + // CommittedM is 1 after the first cycle and until the last cycle. Partially completed memory + // operations delay interrupts until the next instruction by suppressing pending interrupts in + // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; + assign LSUStallM = DCacheStallW | HPTWStall | BusStall; - // MMU and Misalignment fault logic required if privileged unit exists + ///////////////////////////////////////////////////////////////////////////////////////////// + // MMU and misalignment fault logic required if privileged unit exists + ///////////////////////////////////////////////////////////////////////////////////////////// if(`ZICSR_SUPPORTED == 1) begin : dmmu - logic DisableTranslation; + logic DisableTranslation; // During HPTW walk or D$ flush disable virtual memory address translation assign DisableTranslation = SelHPTW | FlushDCacheM; mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PrivilegeModeW, .DisableTranslation, - .VAdr(IHAdrM), - .Size(LSUFunct3M[1:0]), - .PTE, - .PageTypeWriteVal(PageType), - .TLBWrite(DTLBWriteM), - .TLBFlush(sfencevmaM), - .PhysicalAddress(PAdrM), - .TLBMiss(DTLBMissM), - .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM), - .InstrAccessFaultF(), .LoadAccessFaultM(LSULoadAccessFaultM), .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), - .InstrPageFaultF(),.LoadPageFaultM, .StoreAmoPageFaultM, + .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), + .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM), + .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM), + .InstrAccessFaultF(), .LoadAccessFaultM(LSULoadAccessFaultM), + .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM, + .StoreAmoPageFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. .DAPageFault(DataDAPageFaultM), - // *** should use LSURWM as this is includes the lr/sc squash. However this introduces a combo loop - // from squash, depends on PAdrM, depends on TLBHit, depends on these *AccessM inputs. .AtomicAccessM(|LSUAtomicM), .ExecuteAccessF(1'b0), .WriteAccessM(PreLSURWM[0]), .ReadAccessM(PreLSURWM[1]), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); - end else begin - // Determine which region of physical memory (if any) is being accessed - - // conditionally move adredecs to here and ifu. - // the lsu will output LSUHSel to EBU (need the same for ifu). - // The ebu will have a mux to select between LSUHSel, IFUHSel - // mux for HWSTRB - // adrdecs out of uncore. - + end else begin // No MMU, so no PMA/page faults and no address translation assign {DTLBMissM, LSULoadAccessFaultM, LSUStoreAmoAccessFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM} = '0; assign {LoadPageFaultM, StoreAmoPageFaultM} = '0; assign PAdrM = IHAdrM[`PA_BITS-1:0]; - assign CacheableM = '1; - assign SelDTIM = `DTIM_SUPPORTED & ~`BUS; // if no pma then select dtim if there is a DTIM. If there is + assign CacheableM = 1'b1; + assign SelDTIM = `DTIM_SUPPORTED & ~`BUS; // if no PMA then select dtim if there is a DTIM. If there is // a bus then this is always 0. Cannot have both without PMA. end ///////////////////////////////////////////////////////////////////////////////////////////// - // Memory System - // Either Data Cache or Data Tightly Integrated Memory or just bus interface + // Memory System (options) + // 1. DTIM + // 2. DTIM and bus + // 3. Bus + // 4. Cache and bus ///////////////////////////////////////////////////////////////////////////////////////////// - logic [`LLEN-1:0] LSUWriteDataM, LittleEndianWriteDataM; - logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; - logic [`LLEN-1:0] ReadDataWordMuxM, DTIMReadDataWordM, DCacheReadDataWordM; - logic IgnoreRequest; + + // Pause IEU memory request if TLB miss. After TLB fill, replay request. + // Discard memory request on pipeline flush assign IgnoreRequest = IgnoreRequestTLB | FlushW; if (`DTIM_SUPPORTED) begin : dtim @@ -211,7 +227,7 @@ module lsu ( logic [1:0] DTIMMemRWM; // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. - assign DTIMAdr = MemRWM[0] ? IEUAdrExtM[`PA_BITS-1:0] : IEUAdrExtE[`PA_BITS-1:0]; // zero extend or contract to PA_BITS + mux2 #(`PA_BITS) DTIMAdrMux(IEUAdrExtE[`PA_BITS-1:0], IEUAdrExtM[`PA_BITS-1:0], MemRWM[0], DTIMAdr); assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. @@ -221,13 +237,13 @@ module lsu ( end else begin end if (`BUS) begin : bus - localparam integer LLENWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`LLEN : 1; - localparam integer LLENLOGBWPL = `DCACHE ? $clog2(LLENWORDSPERLINE) : 1; - localparam integer BEATSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`AHBW : 1; - localparam integer AHBWLOGBWPL = `DCACHE ? $clog2(BEATSPERLINE) : 1; + localparam integer LLENWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`LLEN : 1; // Number of LLEN words in cacheline + localparam integer LLENLOGBWPL = `DCACHE ? $clog2(LLENWORDSPERLINE) : 1; // Log2 of ^ + localparam integer BEATSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`AHBW : 1; // Number of ABHW words (beats) in cacheline + localparam integer AHBWLOGBWPL = `DCACHE ? $clog2(BEATSPERLINE) : 1; // Log2 of ^ if(`DCACHE) begin : dcache - localparam integer LINELEN = `DCACHE ? `DCACHE_LINELENINBITS : `XLEN; - logic [LINELEN-1:0] FetchBuffer; + localparam integer LINELEN = `DCACHE ? `DCACHE_LINELENINBITS : `XLEN; // Number of bytes in cacheline + logic [LINELEN-1:0] FetchBuffer; logic [`PA_BITS-1:0] DCacheBusAdr; logic [AHBWLOGBWPL-1:0] BeatCount; logic DCacheBusAck;