From 9290c3f95798df94d01bce762c41802307ca482a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 27 Nov 2023 20:57:33 -0600 Subject: [PATCH 01/20] Added correct cbo fault behavior. --- src/mmu/pmachecker.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 1ccf6501c..a48ccd241 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -77,5 +77,5 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( assign PMAAccessFault = (SelRegions[0]) & AccessRWXZ | AtomicAccessM & ~AtomicAllowed; assign PMAInstrAccessFaultF = ExecuteAccessF & PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM & PMAAccessFault; - assign PMAStoreAmoAccessFaultM = WriteAccessM & PMAAccessFault; + assign PMAStoreAmoAccessFaultM = (WriteAccessM | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & CMOp[3])) & PMAAccessFault; endmodule From 195def58080d68602520b8eab28e301776397390 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 27 Nov 2023 21:24:30 -0600 Subject: [PATCH 02/20] Extended the abhcacheinterface to zero a cacheline's worth of uncached memory on cbo.zero. --- src/ebu/ahbcacheinterface.sv | 9 ++++++--- src/ebu/buscachefsm.sv | 16 ++++++++++------ src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 8 +++++--- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 054022106..78b0d15e8 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -66,6 +66,7 @@ module ahbcacheinterface #( input logic [LLEN-1:0] WriteDataM, // IEU write data for uncached store input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write input logic [2:0] Funct3, // Size of uncached memory operation + input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache // lsu/ifu interface input logic Stall, // Core pipeline is stalled @@ -80,6 +81,7 @@ module ahbcacheinterface #( logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA logic [AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s logic [AHBW-1:0] PreHWDATA; // AHB Address phase write data + logic [PA_BITS-1:0] PAdrZero; genvar index; @@ -91,10 +93,11 @@ module ahbcacheinterface #( .q(FetchBuffer[(index+1)*AHBW-1:index*AHBW])); end - mux2 #(PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR); + assign PAdrZero = BusCMOZero ? {PAdr[PA_BITS-1:$clog2(LINELEN/8)], {$clog2(LINELEN/8){1'b0}}} : PAdr; + mux2 #(PA_BITS) localadrmux(PAdrZero, CacheBusAdr, Cacheable, LocalHADDR); assign HADDR = ({{PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(AHBW/8)) + LocalHADDR; - mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); + mux2 #(3) sizemux(.d0(Funct3), .d1(AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable | BusCMOZero), .y(HSIZE)); // When AHBW is less than LLEN need extra muxes to select the subword from cache's read data. logic [AHBW-1:0] CacheReadDataWordAHB; @@ -119,6 +122,6 @@ module ahbcacheinterface #( buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm( .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, - .CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed, + .CacheBusRW, .BusCMOZero, .CacheBusAck, .BeatCount, .BeatCountDelayed, .HREADY, .HTRANS, .HWRITE, .HBURST); endmodule diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 4d1d475d8..45f66762f 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -42,6 +42,7 @@ module buscachefsm #( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write + input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache output logic BusStall, // Bus is busy with an in flight memory operation output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt @@ -75,6 +76,9 @@ module buscachefsm #( logic BeatCntEn; logic BeatCntReset; logic CacheAccess; + logic BusWrite; + + assign BusWrite = CacheBusRW[0] | BusCMOZero; always_ff @(posedge HCLK) if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; @@ -83,18 +87,18 @@ module buscachefsm #( always_comb begin case(CurrState) ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; - else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; else NextState = ADR_PHASE; DATA_PHASE: if(HREADY) NextState = MEM3; else NextState = DATA_PHASE; MEM3: if(Stall) NextState = MEM3; else NextState = ADR_PHASE; - CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + CACHE_FETCH: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_FETCH; - CACHE_WRITEBACK: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + CACHE_WRITEBACK: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_WRITEBACK; @@ -128,7 +132,7 @@ module buscachefsm #( (CacheAccess & FinalBeatCount & |CacheBusRW & HREADY & ~Flush) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |BeatCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; - assign HWRITE = (BusRW[0] | CacheBusRW[0] & ~Flush) | (CurrState == CACHE_WRITEBACK & |BeatCount); + assign HWRITE = (BusRW[0] | BusWrite & ~Flush) | (CurrState == CACHE_WRITEBACK & |BeatCount); assign HBURST = `BURST_EN & ((|CacheBusRW & ~Flush) | (CacheAccess & |BeatCount)) ? LocalBurstType : 3'b0; always_comb begin @@ -142,8 +146,8 @@ module buscachefsm #( end // communication to cache - assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount); - assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | CacheBusRW[0])) | + assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount & ~BusCMOZero); + assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | BusWrite)) | (CurrState == DATA_PHASE & BusRW[0]) | (CurrState == CACHE_WRITEBACK) | (CurrState == CACHE_FETCH); diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 7d61bd4db..4a02848b5 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -252,7 +252,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( ahbcacheinterface #(P.AHBW, P.LLEN, P.PA_BITS, WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1) ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), .HRDATA, - .Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), + .Flush(FlushD), .CacheBusRW, .BusCMOZero(1'b0), .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), .Funct3(3'b010), .HADDR(IFUHADDR), .HREADY(IFUHREADY), .HWRITE(IFUHWRITE), .CacheBusAdr(ICacheBusAdr), .BeatCount(), .Cacheable(CacheableF), .SelBusBeat(), .WriteDataM('0), .CacheBusAck(ICacheBusAck), .HWDATA(), .CacheableOrFlushCacheM(1'b0), .CacheReadDataWordM('0), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 2bb604d39..6fe4377fc 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -301,12 +301,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic FlushDCache; // Suppress d cache flush if there is an ITLB miss. logic CacheStall; logic [1:0] CacheBusRWTemp; + logic BusCMOZero; if(P.ZICBOZ_SUPPORTED) begin - assign BusRW = ~CacheableM & ~SelDTIM ? CMOpM[3] ? 2'b01 : LSURWM : '0; + assign BusCMOZero = CMOpM[3] & ~CacheableM; end else begin - assign BusRW = ~CacheableM & ~SelDTIM ? LSURWM : '0; + assign BusCMOZero = '0; end + assign BusRW = ~CacheableM & ~SelDTIM ? LSURWM : '0; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; assign CacheRWM = CacheableM & ~SelDTIM ? LSURWM : '0; assign FlushDCache = FlushDCacheM & ~(SelHPTW); @@ -332,7 +334,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), - .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheableOrFlushCacheM, + .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM)); From 69653e5faacff63c4165e0e4ef45a3a3bca7e881 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 27 Nov 2023 23:38:53 -0600 Subject: [PATCH 03/20] Fixed minor bug in the cbo hazard logic. --- src/ieu/controller.sv | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index a489b7f86..c9195132c 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -426,5 +426,8 @@ module controller import cvw::*; #(parameter cvw_t P) ( // atomic operations are also detected as MemRWD[1] //assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED))); // *** RT: Modify for ZICBOZ - assign StoreStallD = (MemRWE[0] | (|CMOpE & P.ZICBOM_SUPPORTED)) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | (|CMOpD & P.ZICBOM_SUPPORTED))); + logic cboD, cboE; + assign cboE = (|CMOpE[2:0] & P.ZICBOM_SUPPORTED) | (CMOpE[3] & P.ZICBOZ_SUPPORTED); + assign cboD = (|CMOpD[2:0] & P.ZICBOM_SUPPORTED) | (CMOpD[3] & P.ZICBOZ_SUPPORTED); + assign StoreStallD = (MemRWE[0] | cboE) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | cboD)); endmodule From 9a24a5d9570f846ffb2c555932cdb369ebac73a1 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 00:05:12 -0600 Subject: [PATCH 04/20] Renamed signal in pmachecker. --- src/mmu/adrdecs.sv | 8 ++++---- src/mmu/pmachecker.sv | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index 69b3a8c1d..922b98efb 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -30,17 +30,17 @@ module adrdecs import cvw::*; #(parameter cvw_t P) ( input logic [P.PA_BITS-1:0] PhysicalAddress, - input logic AccessRW, AccessRX, AccessRWXZ, AccessRWZ, AccessRXZ, + input logic AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, input logic [1:0] Size, output logic [11:0] SelRegions ); localparam logic [3:0] SUPPORTED_SIZE = (P.LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed - adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRWZ, Size, SUPPORTED_SIZE, SelRegions[11]); - adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRXZ, Size, SUPPORTED_SIZE, SelRegions[10]); + adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRWC, Size, SUPPORTED_SIZE, SelRegions[11]); + adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[10]); adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[9]); - adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRXZ, Size, SUPPORTED_SIZE, SelRegions[8]); + adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[8]); adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec #(P.PA_BITS) clintdec(PhysicalAddress, P.CLINT_BASE[P.PA_BITS-1:0], P.CLINT_RANGE[P.PA_BITS-1:0], P.CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[6]); adrdec #(P.PA_BITS) gpiodec(PhysicalAddress, P.GPIO_BASE[P.PA_BITS-1:0], P.GPIO_RANGE[P.PA_BITS-1:0], P.GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[5]); diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index a48ccd241..016d4defe 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -44,20 +44,20 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( ); logic PMAAccessFault; - logic AccessRW, AccessRWXZ, AccessRX, AccessRWZ, AccessRXZ; + logic AccessRW, AccessRWXZ, AccessRX, AccessRWC, AccessRXC; logic [11:0] SelRegions; logic AtomicAllowed; logic CacheableRegion, IdempotentRegion; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWZ = AccessRW | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); + assign AccessRWC = AccessRW | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); assign AccessRWXZ = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); assign AccessRX = ReadAccessM | ExecuteAccessF; - assign AccessRXZ = AccessRX | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); + assign AccessRXC = AccessRX | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); // Determine which region of physical memory (if any) is being accessed - adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXZ, AccessRWZ, AccessRXZ, Size, SelRegions); + adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, Size, SelRegions); // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[9] | SelRegions[8] | SelRegions[7]; // exclusion-tag: unused-cachable From 0229df4a0ff383e876f13c7b1d74524933b714a8 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 01:03:48 -0600 Subject: [PATCH 05/20] Oups. Introduced undetected bug into the cache's cbo insructions. --- src/cache/cachefsm.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index e7e5e0306..f7edfb733 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -129,7 +129,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; else NextState = STATE_READY; // exclusion-tag-start: icache case - STATE_WRITEBACK: if(CacheBusAck & ~CMOp[3]) NextState = STATE_FETCH; + STATE_WRITEBACK: if(CacheBusAck & ~(|CMOp[3:1])) NextState = STATE_FETCH; else if(CacheBusAck) NextState = STATE_READ_HOLD; else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. From 4d4790ecf9a73ef93e628de619630a3110744bd7 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 14:18:06 -0600 Subject: [PATCH 06/20] Optimizations to cclsm. --- src/lsu/align.sv | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index ce704a316..a52ebdd02 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -65,12 +65,12 @@ module align import cvw::*; #(parameter cvw_t P) ( typedef enum logic [1:0] {STATE_READY, STATE_SPILL, STATE_STORE_DELAY} statetype; statetype CurrState, NextState; - logic TakeSpillM; + logic ValidSpillM; logic SpillM; logic SelSpillM; logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; - logic MisalignedM; + logic ValidMisalignedM, MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; @@ -78,7 +78,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; - logic SaveByteMask; logic HalfMisalignedM, WordMisalignedM; logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; @@ -118,7 +117,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign WordMisalignedM = (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & HalfMisalignedM; assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & WordMisalignedM; - assign ValidAccess = (|MemRWM) & ~SelHPTW; + assign ValidAccess = (|MemRWM); if(P.LLEN == 64) begin logic DoubleSpillM; @@ -126,15 +125,16 @@ module align import cvw::*; #(parameter cvw_t P) ( assign DoubleMisalignedM = (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & DoubleMisalignedM; assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM | DoubleMisalignedM); - assign SpillM = ValidAccess & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); + assign SpillM = ValidAccess & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin - assign SpillM = ValidAccess & CacheableM & (HalfSpillM | WordSpillM); + assign SpillM = ValidAccess & (HalfSpillM | WordSpillM); assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM); end // align by shifting // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); + assign ValidSpillM = SpillM & ~CacheBusHPWTStall; + assign ValidMisalignedM = MisalignedM & ~SelHPTW; always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -142,8 +142,8 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillM & ~MemRWM[0]) NextState = STATE_SPILL; - else if(TakeSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; + STATE_READY: if (ValidSpillM & ~MemRWM[0]) NextState = STATE_SPILL; + else if(ValidSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; @@ -153,9 +153,8 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign SelSpillM = (CurrState == STATE_SPILL | CurrState == STATE_STORE_DELAY); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); - assign SaveByteMask = (CurrState == STATE_READY & TakeSpillM); - assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; + assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); + assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); @@ -173,14 +172,14 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * AccessByteOffsetM : '0); + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * AccessByteOffsetM : '0); + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill @@ -188,6 +187,6 @@ module align import cvw::*; #(parameter cvw_t P) ( {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, // spill, second half {SelSpillM, SelSpillE}, ByteMaskSpillM); - flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SaveByteMask, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); + flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SpillSaveM, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule From df854280413d5f81c83f53d79413be29a94623ac Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 14:19:30 -0600 Subject: [PATCH 07/20] More optimizations for cclsm. --- src/lsu/align.sv | 3 --- src/lsu/lsu.sv | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index a52ebdd02..1f7f50ec4 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -37,7 +37,6 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation input logic [1:0] MemRWM, - input logic CacheableM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request @@ -54,7 +53,6 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [1:0] MemRWSpillM, output logic SelStoreDelay, //*** this is bad. really don't like moving this outside output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic SpillStallM); @@ -157,7 +155,6 @@ module align import cvw::*; #(parameter cvw_t P) ( assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; - mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled data diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 6fe4377fc..81da2c075 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -159,10 +159,10 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin : ziccslm_align logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, - .MemRWM, .CacheableM, + .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .MemRWSpillM, .DCacheReadDataWordSpillM, .SpillStallM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; From f4e77e96694722d678e551c2a92f1ac7b285ba0b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 14:21:37 -0600 Subject: [PATCH 08/20] Clean up. --- src/lsu/align.sv | 2 -- src/lsu/lsu.sv | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 1f7f50ec4..645054058 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -39,8 +39,6 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [1:0] MemRWM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched - input logic DTLBMissM, // ITLB miss, ignore memory request - input logic DataUpdateDAM, // ITLB miss, ignore memory request input logic SelHPTW, input logic [(P.LLEN-1)/8:0] ByteMaskM, diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 81da2c075..2b8a65ac6 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -160,7 +160,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, - .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .SelHPTW, + .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, .SelStoreDelay); From 865ebf8b9bb60dad52897dd42d06a9a1316ceb51 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 19:41:46 -0600 Subject: [PATCH 09/20] cclsm cleanup. --- src/lsu/align.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 645054058..00261ca24 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -79,6 +79,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; logic ValidAccess; /* verilator lint_off WIDTHEXPAND */ @@ -167,14 +168,15 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); + assign ShiftAmount = ValidMisalignedM ? 8 * AccessByteOffsetM : '0; + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (ValidMisalignedM ? 8 * AccessByteOffsetM : '0); + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill From a69a70ba7fd8867a14a0ec30e643ae19fb14174b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 19:54:25 -0600 Subject: [PATCH 10/20] Removed unused hardware from alignment. --- src/lsu/align.sv | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 00261ca24..b3a20e9b4 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -65,22 +65,20 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SpillM; logic SelSpillM; logic SpillSaveM; - logic [P.LLEN-1:0] ReadDataWordFirstHalfM; + logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic ValidMisalignedM, MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; - logic [P.XLEN-1:0] IEUAdrIncrementM; + logic [P.XLEN-1:0] IEUAdrIncrementM; - logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; - logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; - logic HalfMisalignedM, WordMisalignedM; + logic HalfMisalignedM, WordMisalignedM; logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; - logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; - logic HalfSpillM, WordSpillM; - logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; - logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; - logic ValidAccess; + logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; + logic HalfSpillM, WordSpillM; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; + logic ValidAccess; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -179,11 +177,9 @@ module align import cvw::*; #(parameter cvw_t P) ( assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; - mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill + mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half - {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, // spill, second half + {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half {SelSpillM, SelSpillE}, ByteMaskSpillM); - flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SpillSaveM, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); - mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule From 143c6ca4d190aff8b288429c15112a65256e3fe7 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 22:28:11 -0600 Subject: [PATCH 11/20] Simplification to alignment. --- src/lsu/align.sv | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b3a20e9b4..91eda1b8c 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -72,13 +72,10 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; - logic HalfMisalignedM, WordMisalignedM; - logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; - logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; - logic HalfSpillM, WordSpillM; logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; logic ValidAccess; + logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -95,36 +92,27 @@ module align import cvw::*; #(parameter cvw_t P) ( // 2) offset // 3) access location within the cacheline - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; - always_comb begin case (Funct3M[1:0]) 2'b00: AccessByteOffsetM = '0; // byte access - 2'b01: AccessByteOffsetM = {2'b00, ByteOffsetM[0]}; // half access - 2'b10: AccessByteOffsetM = {1'b0, ByteOffsetM[1:0]}; // word access - 2'b11: AccessByteOffsetM = ByteOffsetM; // double access - default: AccessByteOffsetM = ByteOffsetM; + 2'b01: AccessByteOffsetM = {2'b00, IEUAdrM[0]}; // half access + 2'b10: AccessByteOffsetM = {1'b0, IEUAdrM[1:0]}; // word access + 2'b11: AccessByteOffsetM = IEUAdrM[2:0]; // double access + default: AccessByteOffsetM = IEUAdrM[2:0]; + endcase + case (Funct3M[1:0]) + 2'b00: PotentialSpillM = '0; // byte access + 2'b01: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:1] == '1; // half access + 2'b10: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:2] == '1; // word access + 2'b11: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:3] == '1; // double access + default: PotentialSpillM = '0; endcase end + assign MisalignedM = ValidAccess & (AccessByteOffsetM != '0); + assign SpillM = MisalignedM & PotentialSpillM; // compute misalignement - assign HalfMisalignedM = (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; - assign WordMisalignedM = (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; - assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & HalfMisalignedM; - assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & WordMisalignedM; assign ValidAccess = (|MemRWM); - - if(P.LLEN == 64) begin - logic DoubleSpillM; - logic DoubleMisalignedM; - assign DoubleMisalignedM = (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; - assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & DoubleMisalignedM; - assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM | DoubleMisalignedM); - assign SpillM = ValidAccess & (HalfSpillM | WordSpillM | DoubleSpillM); - end else begin - assign SpillM = ValidAccess & (HalfSpillM | WordSpillM); - assign MisalignedM = ValidAccess & (HalfMisalignedM | WordMisalignedM); - end // align by shifting // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits From 4149ae6c11aeb58198e8a8d89ace92810bfa82a0 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 23:05:47 -0600 Subject: [PATCH 12/20] More cleanup. --- src/lsu/align.sv | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 91eda1b8c..a3e2dadac 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -66,7 +66,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SelSpillM; logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; - logic ValidMisalignedM, MisalignedM; + logic MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; @@ -92,6 +92,9 @@ module align import cvw::*; #(parameter cvw_t P) ( // 2) offset // 3) access location within the cacheline + assign ValidAccess = (|MemRWM); + + // compute misalignement always_comb begin case (Funct3M[1:0]) 2'b00: AccessByteOffsetM = '0; // byte access @@ -110,14 +113,8 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign MisalignedM = ValidAccess & (AccessByteOffsetM != '0); assign SpillM = MisalignedM & PotentialSpillM; - - // compute misalignement - assign ValidAccess = (|MemRWM); - // align by shifting - // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign ValidSpillM = SpillM & ~CacheBusHPWTStall; - assign ValidMisalignedM = MisalignedM & ~SelHPTW; + assign ValidSpillM = SpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -154,7 +151,7 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ShiftAmount = ValidMisalignedM ? 8 * AccessByteOffsetM : '0; + assign ShiftAmount = MisalignedM & ~SelHPTW ? {AccessByteOffsetM, 3'b0} : '0; // AND gate assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; From d29b2b95f7b059063b912f2ec9754f5c6032038a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 28 Nov 2023 23:28:50 -0600 Subject: [PATCH 13/20] Additional cleanup. --- src/lsu/align.sv | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index a3e2dadac..c618bb605 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -62,7 +62,6 @@ module align import cvw::*; #(parameter cvw_t P) ( statetype CurrState, NextState; logic ValidSpillM; - logic SpillM; logic SelSpillM; logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; @@ -74,7 +73,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; - logic ValidAccess; logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ @@ -92,8 +90,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // 2) offset // 3) access location within the cacheline - assign ValidAccess = (|MemRWM); - // compute misalignement always_comb begin case (Funct3M[1:0]) @@ -111,10 +107,9 @@ module align import cvw::*; #(parameter cvw_t P) ( default: PotentialSpillM = '0; endcase end - assign MisalignedM = ValidAccess & (AccessByteOffsetM != '0); - assign SpillM = MisalignedM & PotentialSpillM; + assign MisalignedM = (|MemRWM) & (AccessByteOffsetM != '0); - assign ValidSpillM = SpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall + assign ValidSpillM = MisalignedM & PotentialSpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; From 053b0946201921c928e1e6b5bcec34e9dfada1f9 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 12:26:18 -0600 Subject: [PATCH 14/20] Simpilified pmachecker for cmo. --- src/mmu/adrdecs.sv | 12 ++++++------ src/mmu/pmachecker.sv | 10 ++++------ src/uncore/uncore.sv | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/mmu/adrdecs.sv b/src/mmu/adrdecs.sv index 922b98efb..576bb21b8 100644 --- a/src/mmu/adrdecs.sv +++ b/src/mmu/adrdecs.sv @@ -30,18 +30,18 @@ module adrdecs import cvw::*; #(parameter cvw_t P) ( input logic [P.PA_BITS-1:0] PhysicalAddress, - input logic AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, + input logic AccessRW, AccessRX, AccessRWXC, input logic [1:0] Size, output logic [11:0] SelRegions ); localparam logic [3:0] SUPPORTED_SIZE = (P.LLEN == 32 ? 4'b0111 : 4'b1111); // Determine which region of physical memory (if any) is being accessed - adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRWC, Size, SUPPORTED_SIZE, SelRegions[11]); - adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[10]); - adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[9]); - adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRXC, Size, SUPPORTED_SIZE, SelRegions[8]); - adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWXZ, Size, SUPPORTED_SIZE, SelRegions[7]); + adrdec #(P.PA_BITS) dtimdec(PhysicalAddress, P.DTIM_BASE[P.PA_BITS-1:0], P.DTIM_RANGE[P.PA_BITS-1:0], P.DTIM_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[11]); + adrdec #(P.PA_BITS) iromdec(PhysicalAddress, P.IROM_BASE[P.PA_BITS-1:0], P.IROM_RANGE[P.PA_BITS-1:0], P.IROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[10]); + adrdec #(P.PA_BITS) ddr4dec(PhysicalAddress, P.EXT_MEM_BASE[P.PA_BITS-1:0], P.EXT_MEM_RANGE[P.PA_BITS-1:0], P.EXT_MEM_SUPPORTED, AccessRWXC, Size, SUPPORTED_SIZE, SelRegions[9]); + adrdec #(P.PA_BITS) bootromdec(PhysicalAddress, P.BOOTROM_BASE[P.PA_BITS-1:0], P.BOOTROM_RANGE[P.PA_BITS-1:0], P.BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[8]); + adrdec #(P.PA_BITS) uncoreramdec(PhysicalAddress, P.UNCORE_RAM_BASE[P.PA_BITS-1:0], P.UNCORE_RAM_RANGE[P.PA_BITS-1:0], P.UNCORE_RAM_SUPPORTED, AccessRWXC, Size, SUPPORTED_SIZE, SelRegions[7]); adrdec #(P.PA_BITS) clintdec(PhysicalAddress, P.CLINT_BASE[P.PA_BITS-1:0], P.CLINT_RANGE[P.PA_BITS-1:0], P.CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[6]); adrdec #(P.PA_BITS) gpiodec(PhysicalAddress, P.GPIO_BASE[P.PA_BITS-1:0], P.GPIO_RANGE[P.PA_BITS-1:0], P.GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[5]); adrdec #(P.PA_BITS) uartdec(PhysicalAddress, P.UART_BASE[P.PA_BITS-1:0], P.UART_RANGE[P.PA_BITS-1:0], P.UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[4]); diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 016d4defe..f88479753 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -44,20 +44,18 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( ); logic PMAAccessFault; - logic AccessRW, AccessRWXZ, AccessRX, AccessRWC, AccessRXC; + logic AccessRW, AccessRWXC, AccessRX; logic [11:0] SelRegions; logic AtomicAllowed; logic CacheableRegion, IdempotentRegion; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWC = AccessRW | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); - assign AccessRWXZ = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); + assign AccessRWXC = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); assign AccessRX = ReadAccessM | ExecuteAccessF; - assign AccessRXC = AccessRX | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])); // Determine which region of physical memory (if any) is being accessed - adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXZ, AccessRWC, AccessRXC, Size, SelRegions); + adrdecs #(P) adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWXC, Size, SelRegions); // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[9] | SelRegions[8] | SelRegions[7]; // exclusion-tag: unused-cachable @@ -74,7 +72,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( assign SelTIM = SelRegions[11] | SelRegions[10]; // exclusion-tag: unused-idempotent // Detect access faults - assign PMAAccessFault = (SelRegions[0]) & AccessRWXZ | AtomicAccessM & ~AtomicAllowed; + assign PMAAccessFault = (SelRegions[0]) & AccessRWXC | AtomicAccessM & ~AtomicAllowed; assign PMAInstrAccessFaultF = ExecuteAccessF & PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM & PMAAccessFault; assign PMAStoreAmoAccessFaultM = (WriteAccessM | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & CMOp[3])) & PMAAccessFault; diff --git a/src/uncore/uncore.sv b/src/uncore/uncore.sv index ee93c2904..60d197f78 100644 --- a/src/uncore/uncore.sv +++ b/src/uncore/uncore.sv @@ -88,7 +88,7 @@ module uncore import cvw::*; #(parameter cvw_t P)( // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders // Set access types to all 1 as don't cares because the MMU has already done access checking - adrdecs #(P) adrdecs(HADDR, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); + adrdecs #(P) adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals assign {HSELDTIM, HSELIROM, HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELEXTSDC, HSELSPI} = HSELRegions[11:1]; From 80336493f5f16329a03768f71564381bb5f9e126 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 15:20:49 -0600 Subject: [PATCH 15/20] Cleaned up redundant ZICBOM/Z_SUPPORTED. --- src/cache/cachefsm.sv | 24 ++++++++++++------------ src/ieu/controller.sv | 10 ++++------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index f7edfb733..b8f2130f7 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -94,7 +94,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit - assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; + assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line assign CMOWriteback = CMOWritebackHit | CMOZeroEviction; @@ -130,7 +130,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, else NextState = STATE_READY; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOp[3:1])) NextState = STATE_FETCH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; + else if(CacheBusAck) NextState = STATE_READ_HOLD; // *** why not Ready? else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; @@ -154,24 +154,24 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache assign SetValid = CurrState == STATE_WRITE_LINE | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_READY & CMOZeroNoEviction) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); - assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0] & CacheHit) | - (CurrState == STATE_WRITEBACK & CMOp[2] & CacheBusAck)); + (CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); + assign ClearValid = (CurrState == STATE_READY & CMOp[0]) | + (CurrState == STATE_WRITEBACK & CMOp[2] & CacheBusAck); // coverage off -item e 1 -fecexprrow 8 assign LRUWriteEn = (((CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) | (CurrState == STATE_WRITE_LINE)) & ~FlushStage) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck); + (CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck); // exclusion-tag-start: icache flushdirtycontrols assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | - (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); + (CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls - (P.ZICBOM_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck); - assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOp[1] | CMOp[2])) | (P.ZICBOZ_SUPPORTED & CacheBusAck & CMOp[3]))) | - (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (P.ZICBOZ_SUPPORTED & CMOZeroNoEviction & ~CacheHit))) | + CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck; + assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOp[1] | CMOp[2])) | (CacheBusAck & CMOp[3]))) | + (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2] | ~CacheBusAck)) | (CurrState == STATE_READY & AnyMiss & LineDirty); @@ -194,7 +194,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | - (P.ZICBOM_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & ~CacheBusAck); + (CurrState == STATE_WRITEBACK & (CMOp[1] | CMOp[2]) & ~CacheBusAck); assign SelAdr = (CurrState == STATE_READY & (CacheRW[0] | AnyMiss | (|CMOp))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed (CurrState == STATE_FETCH) | diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index c9195132c..41be3941a 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -357,8 +357,10 @@ module controller import cvw::*; #(parameter cvw_t P) ( // Cache Management instructions always_comb begin CMOpD = 4'b0000; // default: not a cbo instruction - if ((P.ZICBOM_SUPPORTED | P.ZICBOZ_SUPPORTED) & CMOD) begin + if ((P.ZICBOZ_SUPPORTED) & CMOD) begin CMOpD[3] = (InstrD[31:20] == 12'd4); // cbo.zero + end + if ((P.ZICBOM_SUPPORTED) & CMOD) begin CMOpD[2] = (InstrD[31:20] == 12'd2); // cbo.clean CMOpD[1] = (InstrD[31:20] == 12'd1) | ((InstrD[31:20] == 12'd0) & (ENVCFG_CBE[1:0] == 2'b01)); // cbo.flush CMOpD[0] = (InstrD[31:20] == 12'd0) & (ENVCFG_CBE[1:0] == 2'b11); // cbo.inval @@ -425,9 +427,5 @@ module controller import cvw::*; #(parameter cvw_t P) ( // a cache cannot read or write immediately after a write // atomic operations are also detected as MemRWD[1] //assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED))); - // *** RT: Modify for ZICBOZ - logic cboD, cboE; - assign cboE = (|CMOpE[2:0] & P.ZICBOM_SUPPORTED) | (CMOpE[3] & P.ZICBOZ_SUPPORTED); - assign cboD = (|CMOpD[2:0] & P.ZICBOM_SUPPORTED) | (CMOpD[3] & P.ZICBOZ_SUPPORTED); - assign StoreStallD = (MemRWE[0] | cboE) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | cboD)); + assign StoreStallD = (MemRWE[0] | (|CMOpE)) & ((MemRWD[1] | (MemRWD[0] & P.DCACHE_SUPPORTED) | (|CMOpD))); endmodule From fc04b6f7d87319ec8e601a6e9e1261bd84acc359 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 15:39:39 -0600 Subject: [PATCH 16/20] Removed redundant ZICBOM/Z_SUPPORTED from pmachecker. --- src/mmu/pmachecker.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index f88479753..3c23d3623 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -51,7 +51,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; - assign AccessRWXC = ReadAccessM | WriteAccessM | ExecuteAccessF | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & (CMOp[3])); + assign AccessRWXC = ReadAccessM | WriteAccessM | ExecuteAccessF | (|CMOp); assign AccessRX = ReadAccessM | ExecuteAccessF; // Determine which region of physical memory (if any) is being accessed @@ -75,5 +75,5 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( assign PMAAccessFault = (SelRegions[0]) & AccessRWXC | AtomicAccessM & ~AtomicAllowed; assign PMAInstrAccessFaultF = ExecuteAccessF & PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM & PMAAccessFault; - assign PMAStoreAmoAccessFaultM = (WriteAccessM | (P.ZICBOM_SUPPORTED & (|CMOp[2:0])) | (P.ZICBOZ_SUPPORTED & CMOp[3])) & PMAAccessFault; + assign PMAStoreAmoAccessFaultM = (WriteAccessM | (|CMOp)) & PMAAccessFault; endmodule From f4e4aac8b5c78120e1f62a8fbc03470903b389ff Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 16:09:31 -0600 Subject: [PATCH 17/20] Added CMOp to pmp checker --- src/mmu/mmu.sv | 2 +- src/mmu/pmpchecker.sv | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 16016ac47..f9af52139 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -115,7 +115,7 @@ module mmu import cvw::*; #(parameter cvw_t P, if (P.PMP_ENTRIES > 0) begin : pmp pmpchecker #(P) pmpchecker(.PhysicalAddress, .PrivilegeModeW, .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .ExecuteAccessF, .WriteAccessM, .ReadAccessM, + .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .CMOp, .PMPInstrAccessFaultF, .PMPLoadAccessFaultM, .PMPStoreAmoAccessFaultM); end else begin assign PMPInstrAccessFaultF = 0; diff --git a/src/mmu/pmpchecker.sv b/src/mmu/pmpchecker.sv index 89c22c486..ddd7e72b0 100644 --- a/src/mmu/pmpchecker.sv +++ b/src/mmu/pmpchecker.sv @@ -42,6 +42,7 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW [P.PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, + input logic [3:0] CMOp, output logic PMPInstrAccessFaultF, output logic PMPLoadAccessFaultM, output logic PMPStoreAmoAccessFaultM @@ -53,6 +54,8 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( logic [P.PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address. logic [P.PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set logic [P.PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + logic PMPCMOAccessFault, PMPCBOMAccessFault, PMPCBOZAccessFault; + if (P.PMP_ENTRIES > 0) begin: pmp // prevent complaints about array of no elements when PMP_ENTRIES = 0 pmpadrdec #(P) pmpadrdecs[P.PMP_ENTRIES-1:0]( @@ -69,7 +72,11 @@ module pmpchecker import cvw::*; #(parameter cvw_t P) ( // Only enforce PMP checking for S and U modes or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW != P.M_MODE) | (|(L & FirstMatch)); // *** switch to this logic when PMP is initialized for non-machine mode + assign PMPCBOMAccessFault = EnforcePMP & (|CMOp[2:0]) & ~|((R|W) & FirstMatch) ; + assign PMPCBOZAccessFault = EnforcePMP & CMOp[3] & ~|(W & FirstMatch) ; + assign PMPCMOAccessFault = PMPCBOZAccessFault | PMPCBOMAccessFault; + assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; - assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; + assign PMPStoreAmoAccessFaultM = (EnforcePMP & WriteAccessM & ~|(W & FirstMatch)) | PMPCMOAccessFault; assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ; endmodule From f11f88ac2b78bda02cf83a20d4f28346bdfc565d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 16:20:43 -0600 Subject: [PATCH 18/20] Updates to tlb to check access permissions for cbo* --- src/mmu/mmu.sv | 2 +- src/mmu/tlb/tlb.sv | 3 ++- src/mmu/tlb/tlbcontrol.sv | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index f9af52139..e8d87503c 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -85,7 +85,7 @@ module mmu import cvw::*; #(parameter cvw_t P, .SATP_MODE(SATP_REGW[P.XLEN-1:P.XLEN-P.SVMODE_BITS]), .SATP_ASID(SATP_REGW[P.ASID_BASE+P.ASID_BITS-1:P.ASID_BASE]), .VAdr(VAdr[P.XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_HADE, - .PrivilegeModeW, .ReadAccess, .WriteAccess, + .PrivilegeModeW, .ReadAccess, .WriteAccess, .CMOp, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, .Translate, .TLBPageFault, .UpdateDA, .PBMemoryType); diff --git a/src/mmu/tlb/tlb.sv b/src/mmu/tlb/tlb.sv index 861e721b6..a5f95c70d 100644 --- a/src/mmu/tlb/tlb.sv +++ b/src/mmu/tlb/tlb.sv @@ -62,6 +62,7 @@ module tlb import cvw::*; #(parameter cvw_t P, input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic ReadAccess, input logic WriteAccess, + input logic [3:0] CMOp, input logic DisableTranslation, input logic [P.XLEN-1:0] VAdr, // address input before translation (could be physical or virtual) input logic [P.XLEN-1:0] PTE, // page table entry to write @@ -106,7 +107,7 @@ module tlb import cvw::*; #(parameter cvw_t P, assign VPN = VAdr[P.VPN_BITS+11:12]; tlbcontrol #(P, ITLB) tlbcontrol(.SATP_MODE, .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_HADE, - .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .TLBFlush, + .PrivilegeModeW, .ReadAccess, .WriteAccess, .CMOp, .DisableTranslation, .TLBFlush, .PTEAccessBits, .CAMHit, .Misaligned, .TLBMiss, .TLBHit, .TLBPageFault, .UpdateDA, .SV39Mode, .Translate, .PTE_N, .PBMemoryType); diff --git a/src/mmu/tlb/tlbcontrol.sv b/src/mmu/tlb/tlbcontrol.sv index 31312f767..dd296b892 100644 --- a/src/mmu/tlb/tlbcontrol.sv +++ b/src/mmu/tlb/tlbcontrol.sv @@ -35,6 +35,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( input logic ENVCFG_HADE, // HPTW A/D Update enable input logic [1:0] PrivilegeModeW, // Current privilege level of the processeor input logic ReadAccess, WriteAccess, + input logic [3:0] CMOp, input logic DisableTranslation, input logic TLBFlush, // Invalidate all TLB entries input logic [11:0] PTEAccessBits, @@ -67,7 +68,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( assign Translate = (SATP_MODE != P.NO_TRANSLATE[P.SVMODE_BITS-1:0]) & (EffectivePrivilegeMode != P.M_MODE) & ~DisableTranslation; // Determine whether TLB is being used - assign TLBAccess = ReadAccess | WriteAccess; + assign TLBAccess = ReadAccess | WriteAccess | (|CMOp); // Check that upper bits are legal (all 0s or all 1s) vm64check #(P) vm64check(.SATP_MODE, .VAdr, .SV39Mode, .UpperBitsUnequal); @@ -98,6 +99,7 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( assign InvalidAccess = ~PTE_X; end else begin:dtlb // Data TLB fault checking logic InvalidRead, InvalidWrite; + logic InvalidCBOM, InvalidCBOZ; // User mode may only load/store from user mode pages, and supervisor mode // may only access user mode pages when STATUS_SUM is low. @@ -110,7 +112,9 @@ module tlbcontrol import cvw::*; #(parameter cvw_t P, ITLB = 0) ( // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess & ~PTE_W; - assign InvalidAccess = InvalidRead | InvalidWrite; + assign InvalidCBOM = (|CMOp[2:0]) & (~PTE_W | (~PTE_R & (~STATUS_MXR | ~PTE_X))); + assign InvalidCBOZ = CMOp[3] & ~PTE_W; + assign InvalidAccess = InvalidRead | InvalidWrite | InvalidCBOM | InvalidCBOZ; assign PreUpdateDA = ~PTE_A | WriteAccess & ~PTE_D; end From ab68a76e77834368d6f7d79ff47f35503584ee02 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 17:35:26 -0600 Subject: [PATCH 19/20] LineDirty is either the Victim Way or the Flush way dirty, but never the hitway dirty. CBO instructions require hitway dirty. However we cannot mux hitway dirty into LineDirty wihtout creating a combinational loop so we need a separate port. --- src/cache/cache.sv | 9 +++++---- src/cache/cachefsm.sv | 6 ++++-- src/cache/cacheway.sv | 6 ++++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index c527f0eae..80f5559cf 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -79,8 +79,8 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] HitWay, ValidWay; logic CacheHit; - logic [NUMWAYS-1:0] VictimWay, DirtyWay; - logic LineDirty; + logic [NUMWAYS-1:0] VictimWay, DirtyWay, HitWayDirtyWay; + logic LineDirty, HitWayLineDirty; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; logic [TAGLEN-1:0] Tag; logic [SETLEN-1:0] FlushAdr, NextFlushAdr, FlushAdrP1; @@ -116,7 +116,7 @@ module cache import cvw::*; #(parameter cvw_t P, cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, .SelWay, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .VictimWay, - .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); + .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitWayDirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches if(NUMWAYS > 1) begin:vict @@ -128,6 +128,7 @@ module cache import cvw::*; #(parameter cvw_t P, assign CacheHit = |HitWay; assign LineDirty = |DirtyWay; + assign HitWayLineDirty = |HitWayDirtyWay; // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. @@ -218,7 +219,7 @@ module cache import cvw::*; #(parameter cvw_t P, cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .Stall, - .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, + .CacheHit, .LineDirty, .HitWayLineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .SelWay, .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index b8f2130f7..7136fe331 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -51,6 +51,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // cache internals input logic CacheHit, // Exactly 1 way hits input logic LineDirty, // The selected line and way is dirty + input logic HitWayLineDirty, // The cache hit way is dirty input logic FlushAdrFlag, // On last set of a cache flush input logic FlushWayFlag, // On the last way for any set of a cache flush output logic SelAdr, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr @@ -94,7 +95,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit - assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? + assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit & HitWayLineDirty; + //assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? FIXME assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line assign CMOWriteback = CMOWritebackHit | CMOZeroEviction; @@ -130,7 +132,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, else NextState = STATE_READY; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOp[3:1])) NextState = STATE_FETCH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; // *** why not Ready? + else if(CacheBusAck) NextState = STATE_READ_HOLD; // Read_hold lowers CacheStall else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 382d9ae6d..3f250d69a 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -51,7 +51,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, output logic [LINELEN-1:0] ReadDataLineWay,// This way's read data if valid output logic HitWay, // This way hits output logic ValidWay, // This way is valid - output logic DirtyWay, // This way is dirty + output logic HitWayDirtyWay, // The hit way is dirty + output logic DirtyWay , // The selected way is dirty output logic [TAGLEN-1:0] TagWay); // This way's tag if valid localparam WORDSPERLINE = LINELEN/XLEN; @@ -117,7 +118,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, // AND portion of distributed tag multiplexer assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux - assign DirtyWay = SelDirty & Dirty & ValidWay; + assign HitWayDirtyWay = Dirty & ValidWay; + assign DirtyWay = SelDirty & HitWayDirtyWay; assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]); ///////////////////////////////////////////////////////////////////////////////////////////// From 025b04ae8b4d74dccfb3a30091bd2f826f472c70 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 29 Nov 2023 19:44:59 -0600 Subject: [PATCH 20/20] Minior cleanup. --- src/cache/cachefsm.sv | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 7136fe331..b2a2ebf5a 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -96,7 +96,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit & HitWayLineDirty; - //assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; // *** why does this not include dirty? FIXME assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line assign CMOWriteback = CMOWritebackHit | CMOZeroEviction;