diff --git a/sim/Makefile b/sim/Makefile index 658cc6a15..5889d1df9 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -18,7 +18,6 @@ all: riscoftests memfiles coveragetests wally-riscv-arch-test: wallyriscoftests memfiles coverage: cov/rv64gc_arch64i.ucdb - #make -C ../tests/coverage --jobs #iter-elf.bash --cover --search ../tests/coverage vcover merge -out cov/cov.ucdb cov/rv64gc_arch64i.ucdb cov/rv64gc*.ucdb -logfile cov/log # vcover merge -out cov/cov.ucdb cov/rv64gc_arch64i.ucdb cov/rv64gc*.ucdb cov/buildroot_buildroot.ucdb riscv.ucdb -logfile cov/log @@ -60,4 +59,4 @@ memfiles: make -f makefile-memfile wally-sim-files --jobs coveragetests: - make -C ../tests/coverage/ + make -C ../tests/coverage/ --jobs diff --git a/src/cache/cache.sv b/src/cache/cache.sv index b6340a541..c8f707904 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -34,7 +34,6 @@ module cache import cvw::*; #(parameter cvw_t P, input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) // cpu side - input logic [1:0] CacheRWNext, // [1] Read, [0] Write input logic [1:0] CacheRW, // [1] Read, [0] Write input logic FlushCache, // Flush all dirty lines back to memory input logic InvalidateCache, // Clear all valid bits @@ -94,7 +93,6 @@ module cache import cvw::*; #(parameter cvw_t P, logic FlushWayCntEn; logic SelWriteback; logic LRUWriteEn; - logic SelFlush; logic ResetOrFlushCntRst; logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic SelFetchBuffer; @@ -112,10 +110,10 @@ module cache import cvw::*; #(parameter cvw_t P, // and FlushAdr when handling D$ flushes // The icache must update to the newest PCNextF on flush as it is probably a trap. Trap // sets PCNextF to XTVEC and the icache must start reading the instruction. - assign AdrSelMuxSelData = {SelFlush, ((SelAdrData | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))}; + assign AdrSelMuxSelData = {FlushCache, ((SelAdrData | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))}; mux3 #(SETLEN) AdrSelMuxData(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr, AdrSelMuxSelData, CacheSetData); - assign AdrSelMuxSelTag = {SelFlush, ((SelAdrTag | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))}; + assign AdrSelMuxSelTag = {FlushCache, ((SelAdrTag | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))}; mux3 #(SETLEN) AdrSelMuxTag(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr, AdrSelMuxSelTag, CacheSetTag); @@ -123,7 +121,7 @@ module cache import cvw::*; #(parameter cvw_t P, cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CacheSetData, .CacheSetTag, .PAdr, .LineWriteData, .LineByteMask, .SelWay, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .VictimWay, - .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitDirtyWay, .TagWay, .FlushStage, .InvalidateCache); + .FlushWay, .FlushCache, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitDirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches if(NUMWAYS > 1) begin:vict @@ -162,7 +160,7 @@ module cache import cvw::*; #(parameter cvw_t P, mux3 #(PA_BITS) CacheBusAdrMux(.d0({PAdr[PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), - .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); + .s({FlushCache, SelWriteback}), .y(CacheBusAdr)); ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path @@ -225,10 +223,10 @@ module cache import cvw::*; #(parameter cvw_t P, ///////////////////////////////////////////////////////////////////////////////////////////// cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, - .FlushStage, .CacheRW, .CacheRWNext, .Stall, + .FlushStage, .CacheRW, .Stall, .CacheHit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelWay, - .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .SelFlush, + .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CMOpM, .CacheEn, .LRUWriteEn); diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index f960cfdcd..07494c2a9 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -38,7 +38,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic CacheStall, // Cache stalls pipeline during multicycle operation // inputs from IEU input logic [1:0] CacheRW, // [1] Read, [0] Write - input logic [1:0] CacheRWNext, // [1] Read, [0] Write input logic FlushCache, // Flush all dirty lines back to memory input logic InvalidateCache, // Clear all valid bits input logic [3:0] CMOpM, // 0001: cbo.inval; 0010: cbo.flush; 0100: cbo.clean; 1000: cbo.zero @@ -63,7 +62,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic LRUWriteEn, // Update the LRU state - output logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr output logic SelWay, // Controls which way to select a way data and tag, 00 = hitway, 10 = victimway, 11 = flushway output logic FlushAdrCntEn, // Enable the counter for Flush Adr output logic FlushWayCntEn, // Enable the way counter during a flush @@ -79,7 +77,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic CMOWriteback; logic CMOZeroNoEviction; logic StallConditions; - logic StoreHazard; typedef enum logic [3:0]{STATE_READY, // hit states // miss states @@ -106,8 +103,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign CacheAccess = (|CacheRW) & ((CurrState == STATE_READY & ~Stall & ~FlushStage) | (CurrState == STATE_READ_HOLD & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW assign CacheMiss = CacheAccess & ~CacheHit; - assign StoreHazard = CacheRWNext[1] & CacheRW[0] & ~CacheRW[1]; - // special case on reset. When the fsm first exists reset the // PCNextF will no longer be pointing to the correct address. // But PCF will be the reset vector. @@ -124,7 +119,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement else if(AnyMiss | CMOWriteback) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else if(StoreHazard) NextState = STATE_READ_HOLD; else NextState = STATE_READY; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else if(CacheBusAck) NextState = STATE_READY; @@ -150,7 +144,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // com back to CPU assign CacheCommitted = (CurrState != STATE_READY) & ~(READ_ONLY_CACHE & (CurrState == STATE_READ_HOLD)); - assign StallConditions = FlushCache | AnyMiss | CMOWriteback | (StoreHazard); + assign StallConditions = FlushCache | AnyMiss | CMOWriteback; assign CacheStall = (CurrState == STATE_READY & StallConditions) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | @@ -180,12 +174,6 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2] | ~CacheBusAck)) | (CurrState == STATE_READY & AnyMiss & LineDirty); -/* -----\/----- EXCLUDED -----\/----- - assign SelFlush = (CurrState == STATE_READY & FlushCache) | - (CurrState == STATE_FLUSH) | - (CurrState == STATE_FLUSH_WRITEBACK); - -----/\----- EXCLUDED -----/\----- */ - assign SelFlush = FlushCache; // coverage off -item e 1 -fecexprrow 1 // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 52ccc6c15..96762dbde 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -43,7 +43,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, input logic SetDirty, // Set the dirty bit in the selected way and set input logic SelWay, // Controls which way to select a way data and tag, 00 = hitway, 10 = victimway, 11 = flushway input logic ClearDirty, // Clear the dirty bit in the selected way and set - input logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr + input logic FlushCache, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr input logic VictimWay, // LRU selected this way as victim to evict input logic FlushWay, // This way is selected for flush and possible writeback if dirty input logic InvalidateCache,// Clear all valid bits @@ -80,12 +80,12 @@ module cacheway import cvw::*; #(parameter cvw_t P, if (!READ_ONLY_CACHE) begin:flushlogic logic FlushWayEn; - mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelDirty); + mux2 #(1) seltagmux(VictimWay, FlushWay, FlushCache, SelDirty); // FlushWay is part of a one hot way selection. Must clear it if FlushWay not selected. // coverage off -item e 1 -fecexprrow 3 - // nonzero ways will never see SelFlush=0 while FlushWay=1 since FlushWay only advances on a subset of SelFlush assertion cases. - assign FlushWayEn = FlushWay & SelFlush; + // nonzero ways will never see FlushCache=0 while FlushWay=1 since FlushWay only advances on a subset of FlushCache assertion cases. + assign FlushWayEn = FlushWay & FlushCache; assign SelNonHit = FlushWayEn | SelWay; end else begin:flushlogic // no flush operation for read-only caches. assign SelDirty = VictimWay; diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 78b0d15e8..5f2dff313 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -65,6 +65,7 @@ module ahbcacheinterface #( input logic [PA_BITS-1:0] PAdr, // Physical address of uncached memory operation input logic [LLEN-1:0] WriteDataM, // IEU write data for uncached store input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write + input logic BusAtomic, // Uncache atomic memory operation input logic [2:0] Funct3, // Size of uncached memory operation input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache @@ -121,7 +122,7 @@ module ahbcacheinterface #( flopen #(AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[AHBW/8-1:0], HWSTRB); buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE) AHBBuscachefsm( - .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, + .HCLK, .HRESETn, .Flush, .BusRW, .BusAtomic, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, .CacheBusRW, .BusCMOZero, .CacheBusAck, .BeatCount, .BeatCountDelayed, .HREADY, .HTRANS, .HWRITE, .HBURST); endmodule diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 45f66762f..0368164ed 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -42,6 +42,7 @@ module buscachefsm #( input logic Stall, // Core pipeline is stalled input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write + input logic BusAtomic, // Uncache atomic memory operation input logic BusCMOZero, // Uncached cbo.zero must write zero to full sized cacheline without going through the cache output logic BusStall, // Bus is busy with an in flight memory operation output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt @@ -65,7 +66,7 @@ module buscachefsm #( output logic [2:0] HBURST // AHB burst length ); - typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; + typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, ATOMIC_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; typedef enum logic [1:0] {AHB_IDLE = 2'b00, AHB_BUSY = 2'b01, AHB_NONSEQ = 2'b10, AHB_SEQ = 2'b11} ahbtranstype; busstatetype CurrState, NextState; @@ -87,21 +88,25 @@ module buscachefsm #( always_comb begin case(CurrState) ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; - else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; + else if (HREADY & BusWrite) NextState = CACHE_WRITEBACK; else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY) NextState = MEM3; - else NextState = DATA_PHASE; - MEM3: if(Stall) NextState = MEM3; - else NextState = ADR_PHASE; - CACHE_FETCH: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; - else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; - else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; - else NextState = CACHE_FETCH; - CACHE_WRITEBACK: if(HREADY & FinalBeatCount & BusWrite) NextState = CACHE_WRITEBACK; - else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; - else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; - else NextState = CACHE_WRITEBACK; + DATA_PHASE: if(HREADY & BusAtomic) NextState = ATOMIC_PHASE; + else if(HREADY & ~BusAtomic) NextState = MEM3; + else NextState = DATA_PHASE; + ATOMIC_PHASE: if(HREADY) NextState = MEM3; + else NextState = ATOMIC_PHASE; + MEM3: if(Stall) NextState = MEM3; + else NextState = ADR_PHASE; + CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; + else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; + else NextState = CACHE_FETCH; + CACHE_WRITEBACK: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; + else if(HREADY & FinalBeatCount & BusCMOZero) NextState = MEM3; + else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; + else NextState = CACHE_WRITEBACK; default: NextState = ADR_PHASE; endcase end @@ -120,19 +125,23 @@ module buscachefsm #( assign CaptureEn = (CurrState == DATA_PHASE & BusRW[1] & ~Flush) | (CurrState == CACHE_FETCH & HREADY); assign CacheAccess = CurrState == CACHE_FETCH | CurrState == CACHE_WRITEBACK; - assign BusStall = (CurrState == ADR_PHASE & ((|BusRW) | (|CacheBusRW))) | + assign BusStall = (CurrState == ADR_PHASE & ((|BusRW) | (|CacheBusRW) | BusCMOZero)) | //(CurrState == DATA_PHASE & ~BusRW[0]) | // *** replace the next line with this. Fails uart test but i think it's a test problem not a hardware problem. (CurrState == DATA_PHASE) | - (CurrState == CACHE_FETCH & ~HREADY) | - (CurrState == CACHE_WRITEBACK & ~HREADY); + (CurrState == ATOMIC_PHASE) | + (CurrState == CACHE_FETCH & ~FinalBeatCount) | + (CurrState == CACHE_WRITEBACK & ~FinalBeatCount); + assign BusCommitted = (CurrState != ADR_PHASE) & ~(READ_ONLY_CACHE & CurrState == MEM3); // AHB bus interface - assign HTRANS = (CurrState == ADR_PHASE & HREADY & ((|BusRW) | (|CacheBusRW)) & ~Flush) | + assign HTRANS = (CurrState == ADR_PHASE & HREADY & ((|BusRW) | (|CacheBusRW) | BusCMOZero) & ~Flush) | + (CurrState == DATA_PHASE & BusAtomic) | (CacheAccess & FinalBeatCount & |CacheBusRW & HREADY & ~Flush) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |BeatCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; - assign HWRITE = (BusRW[0] | BusWrite & ~Flush) | (CurrState == CACHE_WRITEBACK & |BeatCount); + assign HWRITE = ((BusRW[0] & ~BusAtomic) | BusWrite & ~Flush) | (CurrState == DATA_PHASE & BusAtomic) | + (CurrState == CACHE_WRITEBACK & |BeatCount); assign HBURST = `BURST_EN & ((|CacheBusRW & ~Flush) | (CacheAccess & |BeatCount)) ? LocalBurstType : 3'b0; always_comb begin @@ -149,6 +158,7 @@ module buscachefsm #( assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount & ~BusCMOZero); assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | BusWrite)) | (CurrState == DATA_PHASE & BusRW[0]) | + (CurrState == ATOMIC_PHASE & BusRW[0]) | (CurrState == CACHE_WRITEBACK) | (CurrState == CACHE_FETCH); diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 1285ab4cc..ab4ba414a 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -152,9 +152,11 @@ module controller import cvw::*; #(parameter cvw_t P) ( logic [3:0] CMOpD, CMOpE; // which CMO instruction 1: cbo.inval; 2: cbo.flush; 4: cbo.clean; 8: cbo.zero logic IFUPrefetchD; // instruction prefetch logic LSUPrefetchD, LSUPrefetchE; // data prefetch - logic AMOStallD, CMOStallD; // Structural hazards from atomic and cache management ops + logic CMOStallD; // Structural hazards from cache management ops logic MatchDE; // Match between a source register in Decode stage and destination register in Execute stage logic FCvtIntStallD, MDUStallD, CSRRdStallD; // Stall due to conversion, load, multiply/divide, CSR read + logic StoreStallD; // load after store hazard + // Extract fields assign OpD = InstrD[6:0]; @@ -452,20 +454,12 @@ module controller import cvw::*; #(parameter cvw_t P) ( end // Stall on dependent operations that finish in Mem Stage and can't bypass in time - assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction - assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt - assign LoadStallD = (MemReadE|SCE) & MatchDE; - assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV - assign CSRRdStallD = CSRReadE & MatchDE; - - // the synchronous DTIM cannot read immediately after write - // a cache cannot read or write immediately after a write - // atomic operations are also detected as MemRWD[1] ***check; seems like & MemRWE - // *** RT: Remove this after updating the cache. - // *** RT: Check that atomic after atomic works correctly. - assign AMOStallD = &MemRWE & MemRWD[1]; // Read after atomic operation causes structural hazard - assign CMOStallD = (|CMOpE) & (|CMOpD); // CMO op after CMO op causes structural hazard ***explain, why doesn't interact with read/write - // Structural hazard causes stall if any of these events occur - assign StructuralStallD = LoadStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | AMOStallD | CMOStallD; + assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction + assign LoadStallD = (MemReadE|SCE) & MatchDE; + assign StoreStallD = MemRWD[1] & MemRWE[0]; // Store or AMO followed by load or AMO + assign CSRRdStallD = CSRReadE & MatchDE; + assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV + assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt + assign StructuralStallD = LoadStallD | StoreStallD | CSRRdStallD | MDUStallD | FCvtIntStallD; endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 78568f55f..b2a4abfcd 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -223,6 +223,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( // **** must fix words per line vs beats per line as in lsu. localparam WORDSPERLINE = P.ICACHE_SUPPORTED ? P.ICACHE_LINELENINBITS/P.XLEN : 1; localparam LOGBWPL = P.ICACHE_SUPPORTED ? $clog2(WORDSPERLINE) : 1; + if(P.ICACHE_SUPPORTED) begin : icache localparam LINELEN = P.ICACHE_SUPPORTED ? P.ICACHE_LINELENINBITS : P.XLEN; localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) @@ -246,7 +247,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), .ByteMask('0), .BeatCount('0), .SelBusBeat('0), .CacheWriteData('0), - .CacheRW(CacheRWF), .CacheRWNext('0), // CacheRWNext is only used to detect hazards. Not possible with icache + .CacheRW(CacheRWF), .FlushCache('0), .NextSet(PCSpillNextF[11:0]), .PAdr(PCPF), @@ -257,13 +258,18 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .HRDATA, .Flush(FlushD), .CacheBusRW, .BusCMOZero(1'b0), .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), .Funct3(3'b010), .HADDR(IFUHADDR), .HREADY(IFUHREADY), .HWRITE(IFUHWRITE), .CacheBusAdr(ICacheBusAdr), - .BeatCount(), .Cacheable(CacheableF), .SelBusBeat(), .WriteDataM('0), + .BeatCount(), .Cacheable(CacheableF), .SelBusBeat(), .WriteDataM('0), .BusAtomic('0), .CacheBusAck(ICacheBusAck), .HWDATA(), .CacheableOrFlushCacheM(1'b0), .CacheReadDataWordM('0), .FetchBuffer, .PAdr(PCPF), .BusRW, .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF)); - mux3 #(32) UnCachedDataMux(.d0(ICacheInstrF), .d1(FetchBuffer[32-1:0]), .d2(IROMInstrF), + logic [31:0] ShiftUncachedInstr; + + if(P.XLEN == 64) mux4 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], FetchBuffer[48-1:16], FetchBuffer[64-1:32], {16'b0, FetchBuffer[64-1:48]}, + PCSpillF[2:1], ShiftUncachedInstr); + else mux2 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], {16'b0, FetchBuffer[32-1:16]}, PCSpillF[1], ShiftUncachedInstr); + mux3 #(32) UnCachedDataMux(.d0(ICacheInstrF), .d1(ShiftUncachedInstr), .d2(IROMInstrF), .s({SelIROM, ~CacheableF}), .y(InstrRawF[31:0])); end else begin : passthrough assign IFUHADDR = PCPF; diff --git a/src/lsu/dtim.sv b/src/lsu/dtim.sv index f896b506b..a44086a15 100644 --- a/src/lsu/dtim.sv +++ b/src/lsu/dtim.sv @@ -28,17 +28,15 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module dtim import cvw::*; #(parameter cvw_t P) ( - input logic clk, reset, - input logic FlushW, - input logic ce, // Chip Enable. 0: Holds ReadDataWordM - input logic [1:0] MemRWM, // Read/Write control - input logic [1:0] MemRWE, // Read/Write control - input logic [P.PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address - input logic [P.LLEN-1:0] WriteDataM, // Write data from IEU - input logic [P.LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write - output logic [P.LLEN-1:0] ReadDataWordM, // Read data before subword selection - output logic DTIMStall, - output logic DTIMSelWrite + input logic clk, reset, + input logic FlushW, + input logic ce, // Chip Enable. 0: Holds ReadDataWordM + input logic [1:0] MemRWM, // Read/Write control + input logic [1:0] MemRWE, // Read/Write control + input logic [P.PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address + input logic [P.LLEN-1:0] WriteDataM, // Write data from IEU + input logic [P.LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write + output logic [P.LLEN-1:0] ReadDataWordM // Read data before subword selection ); logic we; @@ -50,16 +48,8 @@ module dtim import cvw::*; #(parameter cvw_t P) ( localparam ADDR_WDITH = $clog2(DEPTH); localparam OFFSET = $clog2(LLENBYTES); - logic DTIMStallHazard, DTIMStallHazardD; - - assign DTIMStallHazard = MemRWM[0] & MemRWE[1]; - flopr #(1) DTIMStallReg(clk, reset, DTIMStallHazard, DTIMStallHazardD); - assign DTIMStall = DTIMStallHazard & ~DTIMStallHazardD; - - assign DTIMSelWrite = MemRWM[0] & ~(DTIMStallHazard & ~DTIMStall); - - assign we = DTIMSelWrite & ~FlushW; // have to ignore write if Trap. + assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. ram1p1rwbe #(.USE_SRAM(P.USE_SRAM), .DEPTH(DEPTH), .WIDTH(P.LLEN)) - ram(.clk, .ce(ce | DTIMSelWrite), .we, .bwe(ByteMaskM), .addr(DTIMAdr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); + ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(DTIMAdr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index da9ce0305..6417bc573 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -150,7 +150,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic IgnoreRequest; // On FlushM or TLB miss ignore memory operation logic SelDTIM; // Select DTIM rather than bus or D$ logic [P.XLEN-1:0] WriteDataZM; - logic DTIMStall; ///////////////////////////////////////////////////////////////////////////////////////////// // Pipeline for IEUAdr E to M @@ -220,7 +219,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall | DTIMStall; + assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; assign LSUStallM = CacheBusHPWTStall | SpillStallM; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -269,10 +268,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if (P.DTIM_SUPPORTED) begin : dtim logic [P.PA_BITS-1:0] DTIMAdr; logic [1:0] DTIMMemRWM; - logic DTIMSelWrite; // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. - mux2 #(P.PA_BITS) DTIMAdrMux(IEUAdrExtE[P.PA_BITS-1:0], IEUAdrExtM[P.PA_BITS-1:0], DTIMSelWrite, DTIMAdr); + mux2 #(P.PA_BITS) DTIMAdrMux(IEUAdrExtE[P.PA_BITS-1:0], IEUAdrExtM[P.PA_BITS-1:0], MemRWM[0], DTIMAdr); assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. @@ -280,9 +278,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( dtim #(P) dtim(.clk, .reset, .ce(~GatedStallW), .MemRWE(MemRWE), // *** update when you update the cache RWE .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), - .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM), .DTIMStall, .DTIMSelWrite); - end else begin - assign DTIMStall = '0; + .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM)); end if (P.BUS_SUPPORTED) begin : bus if(P.DCACHE_SUPPORTED) begin : dcache @@ -307,11 +303,17 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic CacheStall; logic [1:0] CacheBusRWTemp; logic BusCMOZero; + logic [3:0] CacheCMOpM; + logic BusAtomic; if(P.ZICBOZ_SUPPORTED) begin assign BusCMOZero = CMOpM[3] & ~CacheableM; + assign CacheCMOpM = CacheableM ? CMOpM : '0; + assign BusAtomic = AtomicM[1] & ~CacheableM; end else begin assign BusCMOZero = '0; + assign CacheCMOpM = '0; + assign BusAtomic = '0; end assign BusRW = ~CacheableM & ~SelDTIM ? LSURWM : '0; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; @@ -320,7 +322,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( - .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRWNext(MemRWE), // *** change to LSURWE after updating hptw and atomic + .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), @@ -329,8 +331,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), .FetchBuffer, .CacheBusRW(CacheBusRWTemp), - .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0), .CMOpM(CMOpM)); - + .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0), .CMOpM(CacheCMOpM)); + assign DCacheStallM = CacheStall & ~IgnoreRequestTLB; assign CacheBusRW = CacheBusRWTemp; @@ -340,7 +342,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), - .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusCMOZero, .CacheableOrFlushCacheM, + .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusAtomic, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM));