From 0cc04f417a7dfc510ad157eabb850518da09ddde Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 27 Nov 2023 20:57:33 -0600 Subject: [PATCH 01/20] Added correct cbo fault behavior. --- src/mmu/pmachecker.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 1ccf6501c..a48ccd241 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -77,5 +77,5 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( assign PMAAccessFault = (SelRegions[0]) & AccessRWXZ | AtomicAccessM & ~AtomicAllowed; assign PMAInstrAccessFaultF = ExecuteAccessF & PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM & PMAAccessFault; - assign PMAStoreAmoAccessFaultM = WriteAccessM & PMAAccessFault; + assign PMAStoreAmoAccessFaultM = (WriteAccessM | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & CMOp[3])) & PMAAccessFault; endmodule From 2e4bafe3a6cdba265585ab4837c00802b9ad9c2c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 27 Nov 2023 21:24:30 -0600 Subject: [PATCH 02/20] Extended the abhcacheinterface to zero a cacheline's worth of uncached memory on cbo.zero. --- src/ebu/ahbcacheinterface.sv | 9 ++++++--- src/ebu/buscachefsm.sv | 16 ++++++++++------ src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 8 +++++--- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 054022106..78b0d15e8 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -66,6 +66,7 @@ module ahbcacheinterface #( input logic [LLEN-1:0] WriteDataM, // IEU write data for uncached store input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write input logic [2:0] Funct3, // Size of uncached memory operation + input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache // lsu/ifu interface input logic Stall, // Core pipeline is stalled @@ -80,6 +81,7 @@ module ahbcacheinterface #( logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA logic [AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s logic [AHBW-1:0] PreHWDATA; // AHB Address phase write data + logic [PA_BITS-1:0] PAdrZero; genvar index; @@ -91,10 +93,11 @@ module ahbcacheinterface #( .q(FetchBuffer[(index+1)*AHBW-1:index*AHBW])); end - mux2 #(PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR); + assign PAdrZero = BusCMOZero ? {PAdr[PA_BITS-1:$clog2(LINELEN/8)], {$clog2(LINELEN/8){1'b0}}} : PAdr; + mux2 #(PA_BITS) localadrmux(PAdrZero, CacheBusAdr, Cacheable, LocalHADDR); assign HADDR = ({{PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(AHBW/8)) + LocalHADDR; - mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); + mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable | BusCMOZero), .y(HSIZE)); // When AHBW is less than LLEN need extra muxes to select the subword from cache's read data. logic [AHBW-1:0] CacheReadDataWordAHB; @@ -119,6 +122,6 @@ module ahbcacheinterface #( buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm( .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, - .CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed, + .CacheBusRW, .BusCMOZero, .CacheBusAck, .BeatCount, .BeatCountDelayed, .HREADY, .HTRANS, .HWRITE, .HBURST); endmodule diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 4d1d475d8..45f66762f 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -42,6 +42,7 @@ module buscachefsm #( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write + input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache output logic BusStall, // Bus is busy with an in flight memory operation output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt @@ -75,6 +76,9 @@ module buscachefsm #( logic BeatCntEn; logic BeatCntReset; logic CacheAccess; + logic BusWrite; + + assign BusWrite = CacheBusRW[0] | BusCMOZero; always_ff @(posedge HCLK) if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; @@ -83,18 +87,18 @@ module buscachefsm #( always_comb begin case(CurrState) ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; - else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; else NextState = ADR_PHASE; DATA_PHASE: if(HREADY) NextState = MEM3; else NextState = DATA_PHASE; MEM3: if(Stall) NextState = MEM3; else NextState = ADR_PHASE; - CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + CACHE_FETCH: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_FETCH; - CACHE_WRITEBACK: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + CACHE_WRITEBACK: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_WRITEBACK; @@ -128,7 +132,7 @@ module buscachefsm #( (CacheAccess & FinalBeatCount & |CacheBusRW & HREADY & ~Flush) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |BeatCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; - assign HWRITE = (BusRW[0] | CacheBusRW[0] & ~Flush) | (CurrState == CACHE_WRITEBACK & |BeatCount); + assign HWRITE = (BusRW[0] | BusWrite & ~Flush) | (CurrState == CACHE_WRITEBACK & |BeatCount); assign HBURST = `BURST_EN & ((|CacheBusRW & ~Flush) | (CacheAccess & |BeatCount)) ? LocalBurstType : 3'b0; always_comb begin @@ -142,8 +146,8 @@ module buscachefsm #( end // communication to cache - assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount); - assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | CacheBusRW[0])) | + assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount & ~BusCMOZero); + assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | BusWrite)) | (CurrState == DATA_PHASE & BusRW[0]) | (CurrState == CACHE_WRITEBACK) | (CurrState == CACHE_FETCH); diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 7d61bd4db..4a02848b5 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -252,7 +252,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( ahbcacheinterface #(P.AHBW, P.LLEN, P.PA_BITS, WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1) ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), .HRDATA, - .Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), + .Flush(FlushD), .CacheBusRW, .BusCMOZero(1'b0), .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), .Funct3(3'b010), .HADDR(IFUHADDR), .HREADY(IFUHREADY), .HWRITE(IFUHWRITE), .CacheBusAdr(ICacheBusAdr), .BeatCount(), .Cacheable(CacheableF), .SelBusBeat(), .WriteDataM('0), .CacheBusAck(ICacheBusAck), .HWDATA(), .CacheableOrFlushCacheM(1'b0), .CacheReadDataWordM('0), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 2bb604d39..6fe4377fc 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -301,12 +301,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic FlushDCache; // Suppress d cache flush if there is an ITLB miss. logic CacheStall; logic [1:0] CacheBusRWTemp; + logic BusCMOZero; if(P.ZICBOZ_SUPPORTED) begin - assign BusRW = ~CacheableM & ~SelDTIM ? CMOpM[3] ? 2'b01 : LSURWM : '0; + assign BusCMOZero = CMOpM[3] & ~CacheableM; end else begin - assign BusRW = ~CacheableM & ~SelDTIM ? LSURWM : '0; + assign BusCMOZero = '0; end + assign BusRW = ~CacheableM & ~SelDTIM ? LSURWM : '0; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; assign CacheRWM = CacheableM & ~SelDTIM ? LSURWM : '0; assign FlushDCache = FlushDCacheM & ~(SelHPTW); @@ -332,7 +334,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), - .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheableOrFlushCacheM, + .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM)); From 574150e42753338c2cda4d27926b2d3083038011 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 27 Nov 2023 23:38:53 -0600 Subject: [PATCH 03/20] Fixed minor bug in the cbo hazard logic. --- src/ieu/controller.sv | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index a489b7f86..c9195132c 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -426,5 +426,8 @@ module controller import cvw::*; #(parameter cvw_t P) ( // atomic operations are also detected as MemRWD[1] //assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED))); // *** RT: Modify for ZICBOZ - assign StoreStallD = (MemRWE[0] | (|CMOpE & P.ZICBOM_SUPPORTED)) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | (|CMOpD & P.ZICBOM_SUPPORTED))); + logic cboD, cboE; + assign cboE = (|CMOpE[2:0] & P.ZICBOM_SUPPORTED) | (CMOpE[3] & P.ZICBOZ_SUPPORTED); + assign cboD = (|CMOpD[2:0] & P.ZICBOM_SUPPORTED) | (CMOpD[3] & P.ZICBOZ_SUPPORTED); + assign StoreStallD = (MemRWE[0] | cboE) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | cboD)); endmodule From c6f1e00c630f4254ec151444ac254b3498318fe5 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 00:05:12 -0600 Subject: [PATCH 04/20] Renamed signal in pmachecker. --- src/mmu/adrdecs.sv | 8 ++++---- src/mmu/pmachecker.sv | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index 69b3a8c1d..922b98efb 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -30,17 +30,17 @@ module adrdecs import cvw::*; #(parameter cvw_t P) ( input logic [P.PA_BITS-1:0] PhysicalAddress, - input logic AccessRW, AccessRX, AccessRWXZ, AccessRWZ, AccessRXZ, + input logic AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, input logic [1:0] Size, output logic [11:0] SelRegions ); localparam logic [3:0] SUPPORTED_SIZE = (P.LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed - adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRWZ, Size, SUPPORTED_SIZE, SelRegions[11]); - adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRXZ, Size, SUPPORTED_SIZE, SelRegions[10]); + adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRWC, Size, SUPPORTED_SIZE, SelRegions[11]); + adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[10]); adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[9]); - adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRXZ, Size, SUPPORTED_SIZE, SelRegions[8]); + adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[8]); adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec #(P.PA_BITS) clintdec(PhysicalAddress, P.CLINT_BASE[P.PA_BITS-1:0], P.CLINT_RANGE[P.PA_BITS-1:0], P.CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[6]); adrdec #(P.PA_BITS) gpiodec(PhysicalAddress, P.GPIO_BASE[P.PA_BITS-1:0], P.GPIO_RANGE[P.PA_BITS-1:0], P.GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[5]); diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index a48ccd241..016d4defe 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -44,20 +44,20 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( ); logic PMAAccessFault; - logic AccessRW, AccessRWXZ, AccessRX, AccessRWZ, AccessRXZ; + logic AccessRW, AccessRWXZ, AccessRX, AccessRWC, AccessRXC; logic [11:0] SelRegions; logic AtomicAllowed; logic CacheableRegion, IdempotentRegion; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWZ = AccessRW | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); + assign AccessRWC = AccessRW | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); assign AccessRWXZ = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); assign AccessRX = ReadAccessM | ExecuteAccessF; - assign AccessRXZ = AccessRX | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); + assign AccessRXC = AccessRX | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); // Determine which region of physical memory (if any) is being accessed - adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXZ, AccessRWZ, AccessRXZ, Size, SelRegions); + adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, Size, SelRegions); // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[9] | SelRegions[8] | SelRegions[7]; // exclusion-tag: unused-cachable From 0b72616ccedcd2990d401eb8d968187775d1df0c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 01:03:48 -0600 Subject: [PATCH 05/20] Oups. Introduced undetected bug into the cache's cbo insructions. --- src/cache/cachefsm.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index e7e5e0306..f7edfb733 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -129,7 +129,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; else NextState = STATE_READY; // exclusion-tag-start: icache case - STATE_WRITEBACK: if(CacheBusAck & ~CMOp[3]) NextState = STATE_FETCH; + STATE_WRITEBACK: if(CacheBusAck & ~(|CMOp[3:1])) NextState = STATE_FETCH; else if(CacheBusAck) NextState = STATE_READ_HOLD; else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. From 433fe75f1cafbc8b9cca33dda959166aea8f46da Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 14:18:06 -0600 Subject: [PATCH 06/20] Optimizations to cclsm. --- src/lsu/align.sv | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index ce704a316..a52ebdd02 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -65,12 +65,12 @@ module align import cvw::*; #(parameter cvw_t P) ( typedef enum logic [1:0] {STATE_READY, STATE_SPILL, STATE_STORE_DELAY} statetype; statetype CurrState, NextState; - logic TakeSpillM; + logic ValidSpillM; logic SpillM; logic SelSpillM; logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; - logic MisalignedM; + logic ValidMisalignedM, MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; @@ -78,7 +78,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; - logic SaveByteMask; logic HalfMisalignedM, WordMisalignedM; logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; @@ -118,7 +117,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign WordMisalignedM = (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & HalfMisalignedM; assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & WordMisalignedM; - assign ValidAccess = (|MemRWM) & ~SelHPTW; + assign ValidAccess = (|MemRWM); if(P.LLEN == 64) begin logic DoubleSpillM; @@ -126,15 +125,16 @@ module align import cvw::*; #(parameter cvw_t P) ( assign DoubleMisalignedM = (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & DoubleMisalignedM; assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM | DoubleMisalignedM); - assign SpillM = ValidAccess & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); + assign SpillM = ValidAccess & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin - assign SpillM = ValidAccess & CacheableM & (HalfSpillM | WordSpillM); + assign SpillM = ValidAccess & (HalfSpillM | WordSpillM); assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM); end // align by shifting // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); + assign ValidSpillM = SpillM & ~CacheBusHPWTStall; + assign ValidMisalignedM = MisalignedM & ~SelHPTW; always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -142,8 +142,8 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillM & ~MemRWM[0]) NextState = STATE_SPILL; - else if(TakeSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; + STATE_READY: if (ValidSpillM & ~MemRWM[0]) NextState = STATE_SPILL; + else if(ValidSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; @@ -153,9 +153,8 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign SelSpillM = (CurrState == STATE_SPILL | CurrState == STATE_STORE_DELAY); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); - assign SaveByteMask = (CurrState == STATE_READY & TakeSpillM); - assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; + assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); + assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); @@ -173,14 +172,14 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * AccessByteOffsetM : '0); + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * AccessByteOffsetM : '0); + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill @@ -188,6 +187,6 @@ module align import cvw::*; #(parameter cvw_t P) ( {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, // spill, second half {SelSpillM, SelSpillE}, ByteMaskSpillM); - flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SaveByteMask, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); + flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SpillSaveM, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule From c690bdae533e22e16523f37bc2d0d78611df7424 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 14:19:30 -0600 Subject: [PATCH 07/20] More optimizations for cclsm. --- src/lsu/align.sv | 3 --- src/lsu/lsu.sv | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index a52ebdd02..1f7f50ec4 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -37,7 +37,6 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation input logic [1:0] MemRWM, - input logic CacheableM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request @@ -54,7 +53,6 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [1:0] MemRWSpillM, output logic SelStoreDelay, //*** this is bad. really don't like moving this outside output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic SpillStallM); @@ -157,7 +155,6 @@ module align import cvw::*; #(parameter cvw_t P) ( assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; - mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled data diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 6fe4377fc..81da2c075 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -159,10 +159,10 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin : ziccslm_align logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, - .MemRWM, .CacheableM, + .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .MemRWSpillM, .DCacheReadDataWordSpillM, .SpillStallM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; From d7acb01b3a1f75568c6d5305b4b116ad35d8ed89 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 14:21:37 -0600 Subject: [PATCH 08/20] Clean up. --- src/lsu/align.sv | 2 -- src/lsu/lsu.sv | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 1f7f50ec4..645054058 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -39,8 +39,6 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [1:0] MemRWM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched - input logic DTLBMissM, // ITLB miss, ignore memory request - input logic DataUpdateDAM, // ITLB miss, ignore memory request input logic SelHPTW, input logic [(P.LLEN-1)/8:0] ByteMaskM, diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 81da2c075..2b8a65ac6 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -160,7 +160,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, - .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .SelHPTW, + .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, .SelStoreDelay); From 58445037fca9ad6823a2725ae075239ef4b678bd Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 19:41:46 -0600 Subject: [PATCH 09/20] cclsm cleanup. --- src/lsu/align.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 645054058..00261ca24 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -79,6 +79,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; logic ValidAccess; /* verilator lint_off WIDTHEXPAND */ @@ -167,14 +168,15 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); + assign ShiftAmount = ValidMisalignedM ? 8 * AccessByteOffsetM : '0; + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill From ae910f1e4f846ebfd51a111591e9d2f9acc6b0c6 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 19:54:25 -0600 Subject: [PATCH 10/20] Removed unused hardware from alignment. --- src/lsu/align.sv | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 00261ca24..b3a20e9b4 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -65,22 +65,20 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SpillM; logic SelSpillM; logic SpillSaveM; - logic [P.LLEN-1:0] ReadDataWordFirstHalfM; + logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic ValidMisalignedM, MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; - logic [P.XLEN-1:0] IEUAdrIncrementM; + logic [P.XLEN-1:0] IEUAdrIncrementM; - logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; - logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; - logic HalfMisalignedM, WordMisalignedM; + logic HalfMisalignedM, WordMisalignedM; logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; - logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; - logic HalfSpillM, WordSpillM; - logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; - logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; - logic ValidAccess; + logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; + logic HalfSpillM, WordSpillM; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; + logic ValidAccess; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -179,11 +177,9 @@ module align import cvw::*; #(parameter cvw_t P) ( assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; - mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill + mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half - {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, // spill, second half + {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half {SelSpillM, SelSpillE}, ByteMaskSpillM); - flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SpillSaveM, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); - mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule From 3774fa073b1b63345576b04102f35a314c44e211 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 22:28:11 -0600 Subject: [PATCH 11/20] Simplification to alignment. --- src/lsu/align.sv | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b3a20e9b4..91eda1b8c 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -72,13 +72,10 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; - logic HalfMisalignedM, WordMisalignedM; - logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; - logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; - logic HalfSpillM, WordSpillM; logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; logic ValidAccess; + logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -95,36 +92,27 @@ module align import cvw::*; #(parameter cvw_t P) ( // 2) offset // 3) access location within the cacheline - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; - always_comb begin case (Funct3M[1:0]) 2'b00: AccessByteOffsetM = '0; // byte access - 2'b01: AccessByteOffsetM = {2'b00, ByteOffsetM[0]}; // half access - 2'b10: AccessByteOffsetM = {1'b0, ByteOffsetM[1:0]}; // word access - 2'b11: AccessByteOffsetM = ByteOffsetM; // double access - default: AccessByteOffsetM = ByteOffsetM; + 2'b01: AccessByteOffsetM = {2'b00, IEUAdrM[0]}; // half access + 2'b10: AccessByteOffsetM = {1'b0, IEUAdrM[1:0]}; // word access + 2'b11: AccessByteOffsetM = IEUAdrM[2:0]; // double access + default: AccessByteOffsetM = IEUAdrM[2:0]; + endcase + case (Funct3M[1:0]) + 2'b00: PotentialSpillM = '0; // byte access + 2'b01: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:1] == '1; // half access + 2'b10: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:2] == '1; // word access + 2'b11: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:3] == '1; // double access + default: PotentialSpillM = '0; endcase end + assign MisalignedM = ValidAccess & (AccessByteOffsetM != '0); + assign SpillM = MisalignedM & PotentialSpillM; // compute misalignement - assign HalfMisalignedM = (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; - assign WordMisalignedM = (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; - assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & HalfMisalignedM; - assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & WordMisalignedM; assign ValidAccess = (|MemRWM); - - if(P.LLEN == 64) begin - logic DoubleSpillM; - logic DoubleMisalignedM; - assign DoubleMisalignedM = (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; - assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & DoubleMisalignedM; - assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM | DoubleMisalignedM); - assign SpillM = ValidAccess & (HalfSpillM | WordSpillM | DoubleSpillM); - end else begin - assign SpillM = ValidAccess & (HalfSpillM | WordSpillM); - assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM); - end // align by shifting // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits From 7eca28eaeeb36c8e471b949f8328e00884067a35 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 23:05:47 -0600 Subject: [PATCH 12/20] More cleanup. --- src/lsu/align.sv | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 91eda1b8c..a3e2dadac 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -66,7 +66,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SelSpillM; logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; - logic ValidMisalignedM, MisalignedM; + logic MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; @@ -92,6 +92,9 @@ module align import cvw::*; #(parameter cvw_t P) ( // 2) offset // 3) access location within the cacheline + assign ValidAccess = (|MemRWM); + + // compute misalignement always_comb begin case (Funct3M[1:0]) 2'b00: AccessByteOffsetM = '0; // byte access @@ -110,14 +113,8 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign MisalignedM = ValidAccess & (AccessByteOffsetM != '0); assign SpillM = MisalignedM & PotentialSpillM; - - // compute misalignement - assign ValidAccess = (|MemRWM); - // align by shifting - // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign ValidSpillM = SpillM & ~CacheBusHPWTStall; - assign ValidMisalignedM = MisalignedM & ~SelHPTW; + assign ValidSpillM = SpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -154,7 +151,7 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ShiftAmount = ValidMisalignedM ? 8 * AccessByteOffsetM : '0; + assign ShiftAmount = MisalignedM & ~SelHPTW ? {AccessByteOffsetM, 3'b0} : '0; // AND gate assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; From c056a6288f16f475a6bfbf64767923b167b8b019 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 23:28:50 -0600 Subject: [PATCH 13/20] Additional cleanup. --- src/lsu/align.sv | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index a3e2dadac..c618bb605 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -62,7 +62,6 @@ module align import cvw::*; #(parameter cvw_t P) ( statetype CurrState, NextState; logic ValidSpillM; - logic SpillM; logic SelSpillM; logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; @@ -74,7 +73,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; - logic ValidAccess; logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ @@ -92,8 +90,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // 2) offset // 3) access location within the cacheline - assign ValidAccess = (|MemRWM); - // compute misalignement always_comb begin case (Funct3M[1:0]) @@ -111,10 +107,9 @@ module align import cvw::*; #(parameter cvw_t P) ( default: PotentialSpillM = '0; endcase end - assign MisalignedM = ValidAccess & (AccessByteOffsetM != '0); - assign SpillM = MisalignedM & PotentialSpillM; + assign MisalignedM = (|MemRWM) & (AccessByteOffsetM != '0); - assign ValidSpillM = SpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall + assign ValidSpillM = MisalignedM & PotentialSpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; From 42085dfb9ff9d4264fe155ce68f66deb1533d280 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 12:26:18 -0600 Subject: [PATCH 14/20] Simpilified pmachecker for cmo. --- src/mmu/adrdecs.sv | 12 ++++++------ src/mmu/pmachecker.sv | 10 ++++------ src/uncore/uncore.sv | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index 922b98efb..576bb21b8 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -30,18 +30,18 @@ module adrdecs import cvw::*; #(parameter cvw_t P) ( input logic [P.PA_BITS-1:0] PhysicalAddress, - input logic AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, + input logic AccessRW, AccessRX, AccessRWXC, input logic [1:0] Size, output logic [11:0] SelRegions ); localparam logic [3:0] SUPPORTED_SIZE = (P.LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed - adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRWC, Size, SUPPORTED_SIZE, SelRegions[11]); - adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[10]); - adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[9]); - adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[8]); - adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[7]); + adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[11]); + adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[10]); + adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWXC, Size, SUPPORTED_SIZE, SelRegions[9]); + adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[8]); + adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWXC, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec #(P.PA_BITS) clintdec(PhysicalAddress, P.CLINT_BASE[P.PA_BITS-1:0], P.CLINT_RANGE[P.PA_BITS-1:0], P.CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[6]); adrdec #(P.PA_BITS) gpiodec(PhysicalAddress, P.GPIO_BASE[P.PA_BITS-1:0], P.GPIO_RANGE[P.PA_BITS-1:0], P.GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[5]); adrdec #(P.PA_BITS) uartdec(PhysicalAddress, P.UART_BASE[P.PA_BITS-1:0], P.UART_RANGE[P.PA_BITS-1:0], P.UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[4]); diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 016d4defe..f88479753 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -44,20 +44,18 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( ); logic PMAAccessFault; - logic AccessRW, AccessRWXZ, AccessRX, AccessRWC, AccessRXC; + logic AccessRW, AccessRWXC, AccessRX; logic [11:0] SelRegions; logic AtomicAllowed; logic CacheableRegion, IdempotentRegion; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWC = AccessRW | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); - assign AccessRWXZ = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); + assign AccessRWXC = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); assign AccessRX = ReadAccessM | ExecuteAccessF; - assign AccessRXC = AccessRX | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); // Determine which region of physical memory (if any) is being accessed - adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, Size, SelRegions); + adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXC, Size, SelRegions); // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[9] | SelRegions[8] | SelRegions[7]; // exclusion-tag: unused-cachable @@ -74,7 +72,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( assign SelTIM = SelRegions[11] | SelRegions[10]; // exclusion-tag: unused-idempotent // Detect access faults - assign PMAAccessFault = (SelRegions[0]) & AccessRWXZ | AtomicAccessM & ~AtomicAllowed; + assign PMAAccessFault = (SelRegions[0]) & AccessRWXC | AtomicAccessM & ~AtomicAllowed; assign PMAInstrAccessFaultF = ExecuteAccessF & PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM & PMAAccessFault; assign PMAStoreAmoAccessFaultM = (WriteAccessM | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & CMOp[3])) & PMAAccessFault; diff --git a/src/uncore/uncore.sv b/src/uncore/uncore.sv index ee93c2904..60d197f78 100644 --- a/src/uncore/uncore.sv +++ b/src/uncore/uncore.sv @@ -88,7 +88,7 @@ module uncore import cvw::*; #(parameter cvw_t P)( // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders // Set access types to all 1 as don't cares because the MMU has already done access checking - adrdecs #(P) adrdecs(HADDR, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); + adrdecs #(P) adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals assign {HSELDTIM, HSELIROM, HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELEXTSDC, HSELSPI} = HSELRegions[11:1]; From e8aef432b8b28aeb74fbbd7f3791f9d5c03dd815 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 15:20:49 -0600 Subject: [PATCH 15/20] Cleaned up redundant ZICBOM/Z_SUPPORTED. --- src/cache/cachefsm.sv | 24 ++++++++++++------------ src/ieu/controller.sv | 10 ++++------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index f7edfb733..b8f2130f7 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -94,7 +94,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit - assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; + assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line assign CMOWriteback = CMOWritebackHit | CMOZeroEviction; @@ -130,7 +130,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, else NextState = STATE_READY; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOp[3:1])) NextState = STATE_FETCH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; + else if(CacheBusAck) NextState = STATE_READ_HOLD; // *** why not Ready? else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; @@ -154,24 +154,24 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache assign SetValid = CurrState == STATE_WRITE_LINE | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_READY & CMOZeroNoEviction) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); - assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0] & CacheHit) | - (CurrState == STATE_WRITEBACK & CMOp[2] & CacheBusAck)); + (CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); + assign ClearValid = (CurrState == STATE_READY & CMOp[0]) | + (CurrState == STATE_WRITEBACK & CMOp[2] & CacheBusAck); // coverage off -item e 1 -fecexprrow 8 assign LRUWriteEn = (((CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) | (CurrState == STATE_WRITE_LINE)) & ~FlushStage) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck); + (CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck); // exclusion-tag-start: icache flushdirtycontrols assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); + (CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls - (P.ZICBOM_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck); - assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOp[1] | CMOp[2])) | (P.ZICBOZ_SUPPORTED & CacheBusAck & CMOp[3]))) | - (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (P.ZICBOZ_SUPPORTED & CMOZeroNoEviction & ~CacheHit))) | + CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck; + assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOp[1] | CMOp[2])) | (CacheBusAck & CMOp[3]))) | + (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2] | ~CacheBusAck)) | (CurrState == STATE_READY & AnyMiss & LineDirty); @@ -194,7 +194,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | - (P.ZICBOM_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & ~CacheBusAck); + (CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & ~CacheBusAck); assign SelAdr = (CurrState == STATE_READY & (CacheRW[0] | AnyMiss | (|CMOp))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed (CurrState == STATE_FETCH) | diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index c9195132c..41be3941a 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -357,8 +357,10 @@ module controller import cvw::*; #(parameter cvw_t P) ( // Cache Management instructions always_comb begin CMOpD = 4'b0000; // default: not a cbo instruction - if ((P.ZICBOM_SUPPORTED | P.ZICBOZ_SUPPORTED) & CMOD) begin + if ((P.ZICBOZ_SUPPORTED) & CMOD) begin CMOpD[3] = (InstrD[31:20] == 12'd4); // cbo.zero + end + if ((P.ZICBOM_SUPPORTED) & CMOD) begin CMOpD[2] = (InstrD[31:20] == 12'd2); // cbo.clean CMOpD[1] = (InstrD[31:20] == 12'd1) | ((InstrD[31:20] == 12'd0) & (ENVCFG_CBE[1:0] == 2'b01)); // cbo.flush CMOpD[0] = (InstrD[31:20] == 12'd0) & (ENVCFG_CBE[1:0] == 2'b11); // cbo.inval @@ -425,9 +427,5 @@ module controller import cvw::*; #(parameter cvw_t P) ( // a cache cannot read or write immediately after a write // atomic operations are also detected as MemRWD[1] //assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED))); - // *** RT: Modify for ZICBOZ - logic cboD, cboE; - assign cboE = (|CMOpE[2:0] & P.ZICBOM_SUPPORTED) | (CMOpE[3] & P.ZICBOZ_SUPPORTED); - assign cboD = (|CMOpD[2:0] & P.ZICBOM_SUPPORTED) | (CMOpD[3] & P.ZICBOZ_SUPPORTED); - assign StoreStallD = (MemRWE[0] | cboE) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | cboD)); + assign StoreStallD = (MemRWE[0] | (|CMOpE)) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | (|CMOpD))); endmodule From 5216976da8fecbbaf94c2f0c6b82c58a4000035f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 15:39:39 -0600 Subject: [PATCH 16/20] Removed redundant ZICBOM/Z_SUPPORTED from pmachecker. --- src/mmu/pmachecker.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index f88479753..3c23d3623 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -51,7 +51,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWXC = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); + assign AccessRWXC = ReadAccessM | WriteAccessM | ExecuteAccessF | (|CMOp); assign AccessRX = ReadAccessM | ExecuteAccessF; // Determine which region of physical memory (if any) is being accessed @@ -75,5 +75,5 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( assign PMAAccessFault = (SelRegions[0]) & AccessRWXC | AtomicAccessM & ~AtomicAllowed; assign PMAInstrAccessFaultF = ExecuteAccessF & PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM & PMAAccessFault; - assign PMAStoreAmoAccessFaultM = (WriteAccessM | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & CMOp[3])) & PMAAccessFault; + assign PMAStoreAmoAccessFaultM = (WriteAccessM | (|CMOp)) & PMAAccessFault; endmodule From 04a996847ad73901d73c296ff2b9843baf02dbff Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 16:09:31 -0600 Subject: [PATCH 17/20] Added CMOp to pmp checker --- src/mmu/mmu.sv | 2 +- src/mmu/pmpchecker.sv | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 16016ac47..f9af52139 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -115,7 +115,7 @@ module mmu import cvw::*; #(parameter cvw_t P, if (P.PMP_ENTRIES > 0) begin : pmp pmpchecker #(P) pmpchecker(.PhysicalAddress, .PrivilegeModeW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .ExecuteAccessF, .WriteAccessM, .ReadAccessM, + .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .CMOp, .PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM); end else begin assign PMPInstrAccessFaultF = 0; diff --git a/src/mmu/pmpchecker.sv b/src/mmu/pmpchecker.sv index 89c22c486..ddd7e72b0 100644 --- a/src/mmu/pmpchecker.sv +++ b/src/mmu/pmpchecker.sv @@ -42,6 +42,7 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, + input logic [3:0] CMOp, output logic PMPInstrAccessFaultF, output logic PMPLoadAccessFaultM, output logic PMPStoreAmoAccessFaultM @@ -53,6 +54,8 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( logic [P.PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. logic [P.PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set logic [P.PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + logic PMPCMOAccessFault, PMPCBOMAccessFault, PMPCBOZAccessFault; + if (P.PMP_ENTRIES > 0) begin: pmp // prevent complaints about array of no elements when PMP_ENTRIES = 0 pmpadrdec #(P) pmpadrdecs[P.PMP_ENTRIES-1:0]( @@ -69,7 +72,11 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( // Only enforce PMP checking for S and U modes or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW != P.M_MODE) | (|(L & FirstMatch)); // *** switch to this logic when PMP is initialized for non-machine mode + assign PMPCBOMAccessFault = EnforcePMP & (|CMOp[2:0]) & ~|((R|W) & FirstMatch) ; + assign PMPCBOZAccessFault = EnforcePMP & CMOp[3] & ~|(W & FirstMatch) ; + assign PMPCMOAccessFault = PMPCBOZAccessFault | PMPCBOMAccessFault; + assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; - assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; + assign PMPStoreAmoAccessFaultM = (EnforcePMP & WriteAccessM & ~|(W & FirstMatch)) | PMPCMOAccessFault; assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ; endmodule From 439f024bccb81026e3bbf378a972b986b0094bd7 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 16:20:43 -0600 Subject: [PATCH 18/20] Updates to tlb to check access permissions for cbo* --- src/mmu/mmu.sv | 2 +- src/mmu/tlb/tlb.sv | 3 ++- src/mmu/tlb/tlbcontrol.sv | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index f9af52139..e8d87503c 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -85,7 +85,7 @@ module mmu import cvw::*; #(parameter cvw_t P, .SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .SATP_ASID(SATP_REGW[P.ASID_BASE+P.ASID_BITS-1:P.ASID_BASE]), .VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_HADE, - .PrivilegeModeW, .ReadAccess, .WriteAccess, + .PrivilegeModeW, .ReadAccess, .WriteAccess, .CMOp, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, .Translate, .TLBPageFault, .UpdateDA, .PBMemoryType); diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv index 861e721b6..a5f95c70d 100644 --- a/src/mmu/tlb/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -62,6 +62,7 @@ module tlb import cvw::*; #(parameter cvw_t P, input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic ReadAccess, input logic WriteAccess, + input logic [3:0] CMOp, input logic DisableTranslation, input logic [P.XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) input logic [P.XLEN-1:0] PTE, // page table entry to write @@ -106,7 +107,7 @@ module tlb import cvw::*; #(parameter cvw_t P, assign VPN = VAdr[P.VPN_BITS+11:12]; tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_HADE, - .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, + .PrivilegeModeW, .ReadAccess, .WriteAccess, .CMOp, .DisableTranslation, .TLBFlush, .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, .UpdateDA, .SV39Mode, .Translate, .PTE_N, .PBMemoryType); diff --git a/src/mmu/tlb/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv index 31312f767..dd296b892 100644 --- a/src/mmu/tlb/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -35,6 +35,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( input logic ENVCFG_HADE, // HPTW A/D Update enable input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic ReadAccess, WriteAccess, + input logic [3:0] CMOp, input logic DisableTranslation, input logic TLBFlush, // Invalidate all TLB entries input logic [11:0] PTEAccessBits, @@ -67,7 +68,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( assign Translate = (SATP_MODE != P.NO_TRANSLATE[P.SVMODE_BITS-1:0]) & (EffectivePrivilegeMode != P.M_MODE) & ~DisableTranslation; // Determine whether TLB is being used - assign TLBAccess = ReadAccess | WriteAccess; + assign TLBAccess = ReadAccess | WriteAccess | (|CMOp); // Check that upper bits are legal (all 0s or all 1s) vm64check #(P) vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); @@ -98,6 +99,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( assign InvalidAccess = ~PTE_X; end else begin:dtlb // Data TLB fault checking logic InvalidRead, InvalidWrite; + logic InvalidCBOM, InvalidCBOZ; // User mode may only load/store from user mode pages, and supervisor mode // may only access user mode pages when STATUS_SUM is low. @@ -110,7 +112,9 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess & ~PTE_W; - assign InvalidAccess = InvalidRead | InvalidWrite; + assign InvalidCBOM = (|CMOp[2:0]) & (~PTE_W | (~PTE_R & (~STATUS_MXR | ~PTE_X))); + assign InvalidCBOZ = CMOp[3] & ~PTE_W; + assign InvalidAccess = InvalidRead | InvalidWrite | InvalidCBOM | InvalidCBOZ; assign PreUpdateDA = ~PTE_A | WriteAccess & ~PTE_D; end From f0d886661688ca3b2802987c94ce4a01ed0732ae Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 17:35:26 -0600 Subject: [PATCH 19/20] LineDirty is either the Victim Way or the Flush way dirty, but never the hitway dirty. CBO instructions require hitway dirty. However we cannot mux hitway dirty into LineDirty wihtout creating a combinational loop so we need a separate port. --- src/cache/cache.sv | 9 +++++---- src/cache/cachefsm.sv | 6 ++++-- src/cache/cacheway.sv | 6 ++++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index c527f0eae..80f5559cf 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -79,8 +79,8 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] HitWay, ValidWay; logic CacheHit; - logic [NUMWAYS-1:0] VictimWay, DirtyWay; - logic LineDirty; + logic [NUMWAYS-1:0] VictimWay, DirtyWay, HitWayDirtyWay; + logic LineDirty, HitWayLineDirty; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; logic [TAGLEN-1:0] Tag; logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1; @@ -116,7 +116,7 @@ module cache import cvw::*; #(parameter cvw_t P, cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, .SelWay, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .VictimWay, - .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); + .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitWayDirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches if(NUMWAYS > 1) begin:vict @@ -128,6 +128,7 @@ module cache import cvw::*; #(parameter cvw_t P, assign CacheHit = |HitWay; assign LineDirty = |DirtyWay; + assign HitWayLineDirty = |HitWayDirtyWay; // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. @@ -218,7 +219,7 @@ module cache import cvw::*; #(parameter cvw_t P, cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .Stall, - .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, + .CacheHit, .LineDirty, .HitWayLineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .SelWay, .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index b8f2130f7..7136fe331 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -51,6 +51,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // cache internals input logic CacheHit, // Exactly 1 way hits input logic LineDirty, // The selected line and way is dirty + input logic HitWayLineDirty, // The cache hit way is dirty input logic FlushAdrFlag, // On last set of a cache flush input logic FlushWayFlag, // On the last way for any set of a cache flush output logic SelAdr, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr @@ -94,7 +95,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit - assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? + assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit & HitWayLineDirty; + //assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? FIXME assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line assign CMOWriteback = CMOWritebackHit | CMOZeroEviction; @@ -130,7 +132,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, else NextState = STATE_READY; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOp[3:1])) NextState = STATE_FETCH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; // *** why not Ready? + else if(CacheBusAck) NextState = STATE_READ_HOLD; // Read_hold lowers CacheStall else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 382d9ae6d..3f250d69a 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -51,7 +51,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, output logic [LINELEN-1:0] ReadDataLineWay,// This way's read data if valid output logic HitWay, // This way hits output logic ValidWay, // This way is valid - output logic DirtyWay, // This way is dirty + output logic HitWayDirtyWay, // The hit way is dirty + output logic DirtyWay , // The selected way is dirty output logic [TAGLEN-1:0] TagWay); // This way's tag if valid localparam WORDSPERLINE = LINELEN/XLEN; @@ -117,7 +118,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, // AND portion of distributed tag multiplexer assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux - assign DirtyWay = SelDirty & Dirty & ValidWay; + assign HitWayDirtyWay = Dirty & ValidWay; + assign DirtyWay = SelDirty & HitWayDirtyWay; assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]); ///////////////////////////////////////////////////////////////////////////////////////////// From 5d744dd8553ca70fb02034a8ca94e2a0a7c730dd Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 19:44:59 -0600 Subject: [PATCH 20/20] Minior cleanup. --- src/cache/cachefsm.sv | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 7136fe331..b2a2ebf5a 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -96,7 +96,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit & HitWayLineDirty; - //assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? FIXME assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line assign CMOWriteback = CMOWritebackHit | CMOZeroEviction;