From be8e0eee1bd8ee9310c2599bb98616021609deba Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Nov 2022 15:03:43 -0600 Subject: [PATCH] Removed TrapM from the LSU and IFU. TrapM is replaced with FlushW for both. (Don't like this for the IFU). FlushW prevents writting the cache, dtim, and bus state. FlushW still gates HTRANS. FlushW does not impact the mealy outputs of the cache and bus FSMs and hazard is updated to not stall W if we get a trap. --- pipelined/src/cache/cache.sv | 7 ++-- pipelined/src/cache/cachefsm.sv | 42 ++++++++++------------- pipelined/src/cache/cacheway.sv | 13 +++---- pipelined/src/ebu/ahbcacheinterface.sv | 3 +- pipelined/src/ebu/ahbinterface.sv | 3 +- pipelined/src/ebu/buscachefsm.sv | 5 +-- pipelined/src/ebu/busfsm.sv | 7 ++-- pipelined/src/hazard/hazard.sv | 2 +- pipelined/src/ifu/ifu.sv | 18 +++++----- pipelined/src/ifu/spillsupport.sv | 8 ++--- pipelined/src/lsu/dtim.sv | 16 ++++----- pipelined/src/lsu/lsu.sv | 25 +++++++------- pipelined/src/lsu/lsuvirtmen.sv | 10 +++--- pipelined/src/mmu/hptw.sv | 14 +++++++- pipelined/src/wally/wallypipelinedcore.sv | 4 +-- 15 files changed, 96 insertions(+), 81 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 16d2d2da6..dfd64684b 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -34,6 +34,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE input logic clk, input logic reset, // cpu side + input logic Flush, input logic CPUBusy, input logic [1:0] CacheRW, input logic [1:0] CacheAtomic, @@ -125,11 +126,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, - .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, + .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Flush, .Invalidate(InvalidateCache)); if(NUMWAYS > 1) begin:vict cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( - .clk, .reset, .ce(SRAMEnable), .HitWay, .VictimWay, .RAdr, .LRUWriteEn); + .clk, .reset, .ce(SRAMEnable), .HitWay, .VictimWay, .RAdr, .LRUWriteEn(LRUWriteEn & ~Flush)); end else assign VictimWay = 1'b1; // one hot. assign CacheHit = | HitWay; assign VictimDirty = | VictimDirtyWay; @@ -206,7 +207,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Cache FSM ///////////////////////////////////////////////////////////////////////////////////////////// cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, - .CacheRW, .CacheAtomic, .CPUBusy, + .Flush, .CacheRW, .CacheAtomic, .CPUBusy, .CacheHit, .VictimDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .ClearValid, .ClearDirty, .SetDirty, diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index ff5183b7f..edbf6f995 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -34,6 +34,7 @@ module cachefsm (input logic clk, input logic reset, // inputs from IEU + input logic Flush, input logic [1:0] CacheRW, input logic [1:0] CacheAtomic, input logic FlushCache, @@ -75,9 +76,8 @@ module cachefsm logic resetDelay; logic AMO; - logic DoAMO, DoRead, DoWrite, DoFlush; - logic DoAnyUpdateHit, DoAnyHit; - logic DoAnyMiss; + logic AnyUpdateHit, AnyHit; + logic AnyMiss; logic FlushFlag, FlushWayAndNotAdrFlag; typedef enum logic [3:0] {STATE_READY, // hit states @@ -94,19 +94,15 @@ module cachefsm (* mark_debug = "true" *) statetype CurrState, NextState; - assign DoFlush = FlushCache; assign AMO = CacheAtomic[1] & (&CacheRW); - assign DoAMO = AMO; - assign DoRead = CacheRW[1]; - assign DoWrite = CacheRW[0]; - assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit & ~InvalidateCache; - assign DoAnyUpdateHit = (DoAMO | DoWrite) & CacheHit; - assign DoAnyHit = DoAnyUpdateHit | (DoRead & CacheHit); + assign AnyMiss = (AMO | CacheRW[1] | CacheRW[0]) & ~CacheHit & ~InvalidateCache; + assign AnyUpdateHit = (AMO | CacheRW[0]) & CacheHit; + assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (DoAMO | DoRead | DoWrite) & CurrState == STATE_READY; + assign CacheAccess = (AMO | CacheRW[1] | CacheRW[0]) & CurrState == STATE_READY; assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset the @@ -115,18 +111,18 @@ module cachefsm flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay)); always_ff @(posedge clk) - if (reset) CurrState <= #1 STATE_READY; + if (reset | Flush) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; always_comb begin NextState = STATE_READY; case (CurrState) STATE_READY: if(InvalidateCache) NextState = STATE_READY; - else if(DoFlush) NextState = STATE_FLUSH; + else if(FlushCache) NextState = STATE_FLUSH; // Delayed LRU update. Cannot check if victim line is dirty on this cycle. // To optimize do the fetch first, then eviction if necessary. - else if(DoAnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; - else if(DoAnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; + else if(AnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; + else if(AnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; else NextState = STATE_READY; STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_FETCH_WDV; @@ -155,7 +151,7 @@ module cachefsm // com back to CPU assign CacheCommitted = CurrState != STATE_READY; - assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAnyMiss)) | + assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -165,16 +161,16 @@ module cachefsm (CurrState == STATE_FLUSH_WRITE_BACK & ~(FlushFlag) & CacheBusAck); // write enables internal to cache assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; - assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) | + assign SetDirty = (CurrState == STATE_READY & AnyUpdateHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0])); assign ClearValid = '0; assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | (CurrState == STATE_FLUSH_WRITE_BACK & CacheBusAck); - assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | + assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | - (CurrState == STATE_READY & DoAnyMiss & VictimDirty); + (CurrState == STATE_READY & AnyMiss & VictimDirty); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK); assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag; @@ -185,11 +181,11 @@ module cachefsm assign FlushAdrCntRst = (CurrState == STATE_READY); assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls - assign CacheBusRW[1] = (CurrState == STATE_READY & DoAnyMiss & ~VictimDirty) | + assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~VictimDirty) | (CurrState == STATE_MISS_FETCH_WDV & ~CacheBusAck) | (CurrState == STATE_MISS_EVICT_DIRTY & CacheBusAck); -// assign CacheBusRW[1] = CurrState == STATE_READY & DoAnyMiss; - assign CacheBusRW[0] = (CurrState == STATE_READY & DoAnyMiss & VictimDirty) | +// assign CacheBusRW[1] = CurrState == STATE_READY & AnyMiss; + assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & VictimDirty) | (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITE_BACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); @@ -197,7 +193,7 @@ module cachefsm // (CurrState == STATE_FLUSH_CHECK & VictimDirty); // **** can this be simplified? assign SelAdr = (CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed - (CurrState == STATE_READY & (DoAnyMiss)) | + (CurrState == STATE_READY & (AnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 422ae98da..4776aeaf1 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -48,6 +48,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic VictimWay, input logic FlushWay, input logic Invalidate, + input logic Flush, // input logic [(`XLEN-1)/8:0] ByteMask, input logic [LINELEN/8-1:0] LineByteMask, @@ -86,7 +87,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce, .addr(RAdr), .dout(ReadTag), .bwe('1), - .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidWay)); + .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidWay & ~Flush)); // AND portion of distributed tag multiplexer mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag); @@ -109,7 +110,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .addr(RAdr), .dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), .din(CacheWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), - .we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); + .we(SelectedWriteWordEn & ~Flush), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); end // AND portion of distributed read multiplexers @@ -123,8 +124,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset | Invalidate) ValidBits <= #1 '0; if(ce) begin Valid <= #1 ValidBits[RAdr]; - if (SetValidWay) ValidBits[RAdr] <= #1 1'b1; - else if (ClearValidWay) ValidBits[RAdr] <= #1 1'b0; + if (SetValidWay & ~Flush) ValidBits[RAdr] <= #1 1'b1; + else if (ClearValidWay & ~Flush) ValidBits[RAdr] <= #1 1'b0; end end @@ -138,8 +139,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; if(ce) begin Dirty <= #1 DirtyBits[RAdr]; - if (SetDirtyWay) DirtyBits[RAdr] <= #1 1'b1; - else if (ClearDirtyWay) DirtyBits[RAdr] <= #1 1'b0; + if (SetDirtyWay & ~Flush) DirtyBits[RAdr] <= #1 1'b1; + else if (ClearDirtyWay & ~Flush) DirtyBits[RAdr] <= #1 1'b0; end end end else assign Dirty = 1'b0; diff --git a/pipelined/src/ebu/ahbcacheinterface.sv b/pipelined/src/ebu/ahbcacheinterface.sv index 5652cd023..4600edcd5 100644 --- a/pipelined/src/ebu/ahbcacheinterface.sv +++ b/pipelined/src/ebu/ahbcacheinterface.sv @@ -56,6 +56,7 @@ module ahbcacheinterface #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE input logic Cacheable, // lsu/ifu interface + input logic Flush, input logic [`PA_BITS-1:0] PAdr, input logic [1:0] BusRW, input logic CPUBusy, @@ -83,7 +84,7 @@ module ahbcacheinterface #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE mux2 #(3) sizemux(.d0(Funct3), .d1(`XLEN == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); buscachefsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) AHBBuscachefsm( - .HCLK, .HRESETn, .BusRW, .CPUBusy, .BusCommitted, .BusStall, .CaptureEn, .SelBusWord, + .HCLK, .HRESETn, .Flush, .BusRW, .CPUBusy, .BusCommitted, .BusStall, .CaptureEn, .SelBusWord, .CacheBusRW, .CacheBusAck, .WordCount, .WordCountDelayed, .HREADY, .HTRANS, .HWRITE, .HBURST); endmodule diff --git a/pipelined/src/ebu/ahbinterface.sv b/pipelined/src/ebu/ahbinterface.sv index be2fbba53..9955cca5a 100644 --- a/pipelined/src/ebu/ahbinterface.sv +++ b/pipelined/src/ebu/ahbinterface.sv @@ -47,6 +47,7 @@ module ahbinterface #(parameter LSU = 0) // **** modify to use LSU/ifu parameter output logic [`XLEN/8-1:0] HWSTRB, // lsu/ifu interface + input logic Flush, input logic [1:0] BusRW, input logic [`XLEN/8-1:0] ByteMask, input logic [`XLEN-1:0] WriteData, @@ -71,7 +72,7 @@ module ahbinterface #(parameter LSU = 0) // **** modify to use LSU/ifu parameter assign HWSTRB = '0; end - busfsm busfsm(.HCLK, .HRESETn, .BusRW, + busfsm busfsm(.HCLK, .HRESETn, .Flush, .BusRW, .BusCommitted, .CPUBusy, .BusStall, .CaptureEn, .HREADY, .HTRANS, .HWRITE); endmodule diff --git a/pipelined/src/ebu/buscachefsm.sv b/pipelined/src/ebu/buscachefsm.sv index da8c0e259..6965c6737 100644 --- a/pipelined/src/ebu/buscachefsm.sv +++ b/pipelined/src/ebu/buscachefsm.sv @@ -38,6 +38,7 @@ module buscachefsm #(parameter integer WordCountThreshold, input logic HRESETn, // IEU interface + input logic Flush, input logic [1:0] BusRW, input logic CPUBusy, output logic BusCommitted, @@ -77,7 +78,7 @@ module buscachefsm #(parameter integer WordCountThreshold, logic CacheAccess; always_ff @(posedge HCLK) - if (~HRESETn) CurrState <= #1 ADR_PHASE; + if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; else CurrState <= #1 NextState; always_comb begin @@ -135,7 +136,7 @@ module buscachefsm #(parameter integer WordCountThreshold, assign BusCommitted = CurrState != ADR_PHASE; // AHB bus interface - assign HTRANS = (CurrState == ADR_PHASE & HREADY & (|BusRW | |CacheBusRW)) | + assign HTRANS = (CurrState == ADR_PHASE & HREADY & (|BusRW | |CacheBusRW) & ~Flush) | (CacheAccess & FinalWordCount & |CacheBusRW & HREADY) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |WordCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; diff --git a/pipelined/src/ebu/busfsm.sv b/pipelined/src/ebu/busfsm.sv index 336c3b4e2..8203fa74e 100644 --- a/pipelined/src/ebu/busfsm.sv +++ b/pipelined/src/ebu/busfsm.sv @@ -36,6 +36,7 @@ module busfsm input logic HRESETn, // IEU interface + input logic Flush, input logic [1:0] BusRW, input logic CPUBusy, output logic BusCommitted, @@ -55,8 +56,8 @@ module busfsm (* mark_debug = "true" *) busstatetype CurrState, NextState; always_ff @(posedge HCLK) - if (~HRESETn) CurrState <= #1 ADR_PHASE; - else CurrState <= #1 NextState; + if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; + else CurrState <= #1 NextState; always_comb begin case(CurrState) @@ -76,7 +77,7 @@ module busfsm assign BusCommitted = CurrState != ADR_PHASE; - assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW) ? AHB_NONSEQ : AHB_IDLE; + assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) ? AHB_NONSEQ : AHB_IDLE; assign HWRITE = BusRW[0]; assign CaptureEn = CurrState == DATA_PHASE; diff --git a/pipelined/src/hazard/hazard.sv b/pipelined/src/hazard/hazard.sv index 0ca3c5bc4..39700ed0c 100644 --- a/pipelined/src/hazard/hazard.sv +++ b/pipelined/src/hazard/hazard.sv @@ -70,7 +70,7 @@ module hazard( // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap // assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE; assign StallMCause = ((wfiM) & (~TrapM & ~IntPendingM)); //*** Ross: should FDivBusyE trigger StallECause rather than StallMCause similar to DivBusyE? - assign StallWCause = LSUStallM | IFUStallF | (FDivBusyE & ~TrapM & ~IntPendingM); + assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM) | (FDivBusyE & ~TrapM & ~IntPendingM); assign #1 StallF = StallFCause | StallD; assign #1 StallD = StallDCause | StallE; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 8f632acb9..93804877d 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -34,7 +34,7 @@ module ifu ( input logic clk, reset, input logic StallF, StallD, StallE, StallM, - input logic FlushF, FlushD, FlushE, FlushM, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Bus interface (* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, (* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUHADDR, @@ -130,7 +130,7 @@ module ifu ( if(`C_SUPPORTED) begin : SpillSupport - spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF(InstrRawF), + spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .Flush(TrapM), .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF(InstrRawF), .InstrDAPageFaultF, .IFUCacheBusStallF, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpillSupport @@ -194,7 +194,7 @@ module ifu ( assign CommittedF = CacheCommittedF | BusCommittedF; logic IgnoreRequest; - assign IgnoreRequest = ITLBMissF | TrapM; + assign IgnoreRequest = ITLBMissF | FlushD; // The IROM uses untranslated addresses, so it is not compatible with virtual memory. if (`IROM_SUPPORTED) begin : irom @@ -215,12 +215,12 @@ module ifu ( logic [1:0] CacheBusRW, BusRW, CacheRWF; //assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableF, CacheableF} & ~{SelIROM, SelIROM}; - assign BusRW = ~IgnoreRequest & ~CacheableF & ~SelIROM ? IFURWF : '0; - assign CacheRWF = ~IgnoreRequest & CacheableF & ~SelIROM ? IFURWF : '0; + assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0; + assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) - icache(.clk, .reset, .CPUBusy, + icache(.clk, .reset, .Flush(FlushW), .CPUBusy, .FetchBuffer, .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheBusRW, @@ -237,7 +237,7 @@ module ifu ( ahbcacheinterface #(WORDSPERLINE, LINELEN, LOGBWPL, `ICACHE) ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), .HRDATA, - .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), + .Flush(FlushW), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .Funct3(3'b010), .HADDR(IFUHADDR), .HREADY(IFUHREADY), .HWRITE(IFUHWRITE), .CacheBusAdr(ICacheBusAdr), .WordCount(), .Cacheable(CacheableF), .SelBusWord(), .CacheBusAck(ICacheBusAck), @@ -252,11 +252,11 @@ module ifu ( logic CaptureEn; logic [31:0] FetchBuffer; logic [1:0] BusRW; - assign BusRW = ~IgnoreRequest & ~SelIROM ? IFURWF : '0; + assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0; // assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{SelIROM, SelIROM}; assign IFUHSIZE = 3'b010; - ahbinterface #(0) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(IFUHREADY), + ahbinterface #(0) ahbinterface(.HCLK(clk), .Flush(FlushW), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), .CPUBusy, .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); diff --git a/pipelined/src/ifu/spillsupport.sv b/pipelined/src/ifu/spillsupport.sv index 56bd3a78c..b247c2d32 100644 --- a/pipelined/src/ifu/spillsupport.sv +++ b/pipelined/src/ifu/spillsupport.sv @@ -35,7 +35,7 @@ module spillsupport #(parameter CACHE_ENABLED) (input logic clk, input logic reset, - input logic StallF, + input logic StallF, Flush, input logic [`XLEN-1:0] PCF, input logic [`XLEN-3:0] PCPlusUpperF, input logic [`XLEN-1:0] PCNextF, @@ -61,7 +61,7 @@ module spillsupport #(parameter CACHE_ENABLED) mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlusUpperF, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); - mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF), + mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~Flush), .y(PCNextFSpill)); mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill)); @@ -69,7 +69,7 @@ module spillsupport #(parameter CACHE_ENABLED) assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF)); always_ff @(posedge clk) - if (reset) CurrState <= #1 STATE_READY; + if (reset | Flush) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; always_comb begin @@ -89,7 +89,7 @@ module spillsupport #(parameter CACHE_ENABLED) assign SavedInstr = CACHE_ENABLED ? InstrRawF[15:0] : InstrRawF[31:16]; flopenr #(16) SpillInstrReg(.clk(clk), - .en(SpillSaveF), + .en(SpillSaveF & ~Flush), .reset(reset), .d(SavedInstr), .q(SpillDataLine0)); diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv index 45ada1c6c..8f9918835 100644 --- a/pipelined/src/lsu/dtim.sv +++ b/pipelined/src/lsu/dtim.sv @@ -30,13 +30,13 @@ `include "wally-config.vh" module dtim( - input logic clk, reset, ce, - input logic [1:0] MemRWM, - input logic [`PA_BITS-1:0] Adr, - input logic TrapM, - input logic [`LLEN-1:0] WriteDataM, - input logic [`LLEN/8-1:0] ByteMaskM, - output logic [`LLEN-1:0] ReadDataWordM + input logic clk, reset, ce, + input logic [1:0] MemRWM, + input logic [`PA_BITS-1:0] Adr, + input logic FlushW, + input logic [`LLEN-1:0] WriteDataM, + input logic [`LLEN/8-1:0] ByteMaskM, + output logic [`LLEN-1:0] ReadDataWordM ); logic we; @@ -44,7 +44,7 @@ module dtim( localparam ADDR_WDITH = $clog2(`DTIM_RANGE/8); localparam OFFSET = $clog2(`LLEN/8); - assign we = MemRWM[0] & ~TrapM; // have to ignore write if Trap. + assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. sram1p1rw #(.DEPTH(`DTIM_RANGE/8), .WIDTH(`LLEN)) ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index d469e6721..5be7c9311 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -47,7 +47,6 @@ module lsu ( input logic [2:0] Funct3M, input logic [6:0] Funct7M, input logic [1:0] AtomicM, - input logic TrapM, input logic FlushDCacheM, output logic CommittedM, output logic SquashSCW, @@ -131,7 +130,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED lsuvirtmem lsuvirtmem(.clk, .reset, .StallW, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, - .TrapM, .DCacheStallM, .SATP_REGW, .PCF, + .FlushW, .DCacheStallM, .SATP_REGW, .PCF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, .IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, @@ -203,7 +202,7 @@ module lsu ( logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; logic [`LLEN-1:0] ReadDataWordMuxM, DTIMReadDataWordM, DCacheReadDataWordM; logic IgnoreRequest; - assign IgnoreRequest = IgnoreRequestTLB | TrapM; + assign IgnoreRequest = IgnoreRequestTLB | FlushW; if (`DTIM_SUPPORTED) begin : dtim logic [`PA_BITS-1:0] DTIMAdr; @@ -211,12 +210,12 @@ module lsu ( // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. assign DTIMAdr = MemRWM[0] ? IEUAdrExtM[`PA_BITS-1:0] : IEUAdrExtE[`PA_BITS-1:0]; // zero extend or contract to PA_BITS - assign DTIMMemRWM = SelDTIM & ~IgnoreRequest ? LSURWM : '0; + assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. dtim dtim(.clk, .reset, .ce(~CPUBusy), .MemRWM(DTIMMemRWM), .Adr(DTIMAdr), - .TrapM, .WriteDataM(LSUWriteDataM), + .FlushW, .WriteDataM(LSUWriteDataM), .ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); end else begin end @@ -242,15 +241,15 @@ module lsu ( logic [1:0] CacheRWM, CacheAtomicM; logic CacheFlushM; - assign BusRW = ~CacheableM & ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; + assign BusRW = ~CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; - assign CacheRWM = CacheableM & ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; - assign CacheAtomicM = CacheableM & ~IgnoreRequest & ~SelDTIM ? LSUAtomicM : '0; - assign CacheFlushM = ~TrapM & FlushDCacheM; + assign CacheRWM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; + assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0; + assign CacheFlushM = FlushDCacheM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .DCACHE(1)) dcache( - .clk, .reset, .CPUBusy, .SelBusWord, .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), + .clk, .reset, .CPUBusy, .SelBusWord, .Flush(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(CacheFlushM), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .WordCount(WordCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .FinalWriteData(LSUWriteDataM), .SelHPTW, @@ -260,7 +259,7 @@ module lsu ( .FetchBuffer, .CacheBusRW, .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); ahbcacheinterface #(.WORDSPERLINE(AHBWWORDSPERLINE), .LINELEN(LINELEN), .LOGWPL(AHBWLOGBWPL), .CACHE_ENABLED(`DCACHE)) ahbcacheinterface( - .HCLK(clk), .HRESETn(~reset), + .HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HRDATA, .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .WordCount, .SelBusWord, @@ -303,13 +302,13 @@ module lsu ( logic CaptureEn; logic [1:0] BusRW; logic [`XLEN-1:0] FetchBuffer; - assign BusRW = ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; + assign BusRW = ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; // assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest} & ~{SelDTIM, SelDTIM}; assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; - ahbinterface #(1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(LSUHREADY), + ahbinterface #(1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM), .CPUBusy, .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv index 6530d12d6..d1476db88 100644 --- a/pipelined/src/lsu/lsuvirtmen.sv +++ b/pipelined/src/lsu/lsuvirtmen.sv @@ -39,7 +39,7 @@ module lsuvirtmem( output logic DTLBWriteM, input logic InstrDAPageFaultF, input logic DataDAPageFaultM, - input logic TrapM, + input logic FlushW, input logic DCacheStallM, input logic [`XLEN-1:0] SATP_REGW, // from csr input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, @@ -80,11 +80,13 @@ module lsuvirtmem( // move all the muxes to walkermux and instantiate these in lsu under virtmem_supported. assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); - assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF & ~TrapM; - assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM & ~TrapM; + //assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF & ~TrapM; + assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF; + //assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM & ~TrapM; + assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM; hptw hptw( - .clk, .reset, .SATP_REGW, .PCF, .IEUAdrExtM, .MemRWM, .AtomicM, + .clk, .reset, .SATP_REGW, .PCF, .IEUAdrExtM, .MemRWM, .AtomicM, .FlushW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ITLBMissOrDAFaultNoTrapF, .DTLBMissOrDAFaultNoTrapM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, .HPTWReadPTE(ReadDataM), // *** should it be HPTWReadDataM diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index e2b2573ed..52f7e868e 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -37,6 +37,7 @@ module hptw input logic [`XLEN-1:0] PCF, // addresses to translate input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate input logic [1:0] MemRWM, AtomicM, + input logic FlushW, // system status input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, input logic [1:0] STATUS_MPP, @@ -217,7 +218,14 @@ module hptw end // Page Table Walker FSM - flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + // there is a bug here. Each memory access needs to be potentially flushed if the PMA/P checkers + // generate an access fault. Specially the store on UDPATE_PTE needs to check for access violation. + // I think the solution is to do 1 of the following + // 1. Allow the HPTW to generate exceptions and stop walking immediately. + // 2. If the store would generate an exception don't store to dcache but still write the TLB. When we go back + // to LEAF then the PMA/P. Wait this does not work. The PMA/P won't be looking a the address in the table, but + // rather than physical address of the translated instruction/data. So we must generate the exception. + flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset | FlushW, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb case (WalkerState) IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; @@ -250,3 +258,7 @@ module hptw assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); endmodule + +// another idea. We keep gating the control by ~TrapM, but this adds considerable length to the critical path. +// should we do this differently? For example TLBMiss is gated by ~TrapM and then drives HPTWStall, which drives LSUStallM, which drives +// the hazard unit to issue stall and flush controlls. ~TrapM already suppresses these in the hazard unit. diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 277ca4266..bce17875a 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -170,7 +170,7 @@ module wallypipelinedcore ( ifu ifu( .clk, .reset, .StallF, .StallD, .StallE, .StallM, - .FlushF, .FlushD, .FlushE, .FlushM, + .FlushF, .FlushD, .FlushE, .FlushM, .FlushW, // Fetch .HRDATA, .PCF, .IFUHADDR, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, @@ -249,7 +249,7 @@ module wallypipelinedcore ( .FlushW, // CPU interface .MemRWM, .Funct3M, .Funct7M(InstrM[31:25]), - .AtomicM, .TrapM, + .AtomicM, .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, .FpLoadStoreM,