diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 16d2d2da6..dfd64684b 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -34,6 +34,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE input logic clk, input logic reset, // cpu side + input logic Flush, input logic CPUBusy, input logic [1:0] CacheRW, input logic [1:0] CacheAtomic, @@ -125,11 +126,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0](.clk, .reset, .ce(SRAMEnable), .RAdr, .PAdr, .CacheWriteData, .LineByteMask, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, - .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, + .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Flush, .Invalidate(InvalidateCache)); if(NUMWAYS > 1) begin:vict cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( - .clk, .reset, .ce(SRAMEnable), .HitWay, .VictimWay, .RAdr, .LRUWriteEn); + .clk, .reset, .ce(SRAMEnable), .HitWay, .VictimWay, .RAdr, .LRUWriteEn(LRUWriteEn & ~Flush)); end else assign VictimWay = 1'b1; // one hot. assign CacheHit = | HitWay; assign VictimDirty = | VictimDirtyWay; @@ -206,7 +207,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Cache FSM ///////////////////////////////////////////////////////////////////////////////////////////// cachefsm cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, - .CacheRW, .CacheAtomic, .CPUBusy, + .Flush, .CacheRW, .CacheAtomic, .CPUBusy, .CacheHit, .VictimDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .ClearValid, .ClearDirty, .SetDirty, diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index ff5183b7f..edbf6f995 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -34,6 +34,7 @@ module cachefsm (input logic clk, input logic reset, // inputs from IEU + input logic Flush, input logic [1:0] CacheRW, input logic [1:0] CacheAtomic, input logic FlushCache, @@ -75,9 +76,8 @@ module cachefsm logic resetDelay; logic AMO; - logic DoAMO, DoRead, DoWrite, DoFlush; - logic DoAnyUpdateHit, DoAnyHit; - logic DoAnyMiss; + logic AnyUpdateHit, AnyHit; + logic AnyMiss; logic FlushFlag, FlushWayAndNotAdrFlag; typedef enum logic [3:0] {STATE_READY, // hit states @@ -94,19 +94,15 @@ module cachefsm (* mark_debug = "true" *) statetype CurrState, NextState; - assign DoFlush = FlushCache; assign AMO = CacheAtomic[1] & (&CacheRW); - assign DoAMO = AMO; - assign DoRead = CacheRW[1]; - assign DoWrite = CacheRW[0]; - assign DoAnyMiss = (DoAMO | DoRead | DoWrite) & ~CacheHit & ~InvalidateCache; - assign DoAnyUpdateHit = (DoAMO | DoWrite) & CacheHit; - assign DoAnyHit = DoAnyUpdateHit | (DoRead & CacheHit); + assign AnyMiss = (AMO | CacheRW[1] | CacheRW[0]) & ~CacheHit & ~InvalidateCache; + assign AnyUpdateHit = (AMO | CacheRW[0]) & CacheHit; + assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (DoAMO | DoRead | DoWrite) & CurrState == STATE_READY; + assign CacheAccess = (AMO | CacheRW[1] | CacheRW[0]) & CurrState == STATE_READY; assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset the @@ -115,18 +111,18 @@ module cachefsm flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay)); always_ff @(posedge clk) - if (reset) CurrState <= #1 STATE_READY; + if (reset | Flush) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; always_comb begin NextState = STATE_READY; case (CurrState) STATE_READY: if(InvalidateCache) NextState = STATE_READY; - else if(DoFlush) NextState = STATE_FLUSH; + else if(FlushCache) NextState = STATE_FLUSH; // Delayed LRU update. Cannot check if victim line is dirty on this cycle. // To optimize do the fetch first, then eviction if necessary. - else if(DoAnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; - else if(DoAnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; + else if(AnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; + else if(AnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; else NextState = STATE_READY; STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_FETCH_WDV; @@ -155,7 +151,7 @@ module cachefsm // com back to CPU assign CacheCommitted = CurrState != STATE_READY; - assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAnyMiss)) | + assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -165,16 +161,16 @@ module cachefsm (CurrState == STATE_FLUSH_WRITE_BACK & ~(FlushFlag) & CacheBusAck); // write enables internal to cache assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; - assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) | + assign SetDirty = (CurrState == STATE_READY & AnyUpdateHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0])); assign ClearValid = '0; assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | (CurrState == STATE_FLUSH_WRITE_BACK & CacheBusAck); - assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | + assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | - (CurrState == STATE_READY & DoAnyMiss & VictimDirty); + (CurrState == STATE_READY & AnyMiss & VictimDirty); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK); assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag; @@ -185,11 +181,11 @@ module cachefsm assign FlushAdrCntRst = (CurrState == STATE_READY); assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls - assign CacheBusRW[1] = (CurrState == STATE_READY & DoAnyMiss & ~VictimDirty) | + assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~VictimDirty) | (CurrState == STATE_MISS_FETCH_WDV & ~CacheBusAck) | (CurrState == STATE_MISS_EVICT_DIRTY & CacheBusAck); -// assign CacheBusRW[1] = CurrState == STATE_READY & DoAnyMiss; - assign CacheBusRW[0] = (CurrState == STATE_READY & DoAnyMiss & VictimDirty) | +// assign CacheBusRW[1] = CurrState == STATE_READY & AnyMiss; + assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & VictimDirty) | (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITE_BACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); @@ -197,7 +193,7 @@ module cachefsm // (CurrState == STATE_FLUSH_CHECK & VictimDirty); // **** can this be simplified? assign SelAdr = (CurrState == STATE_READY & ((AMO | CacheRW[0]) & CacheHit)) | // changes if store delay hazard removed - (CurrState == STATE_READY & (DoAnyMiss)) | + (CurrState == STATE_READY & (AnyMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 422ae98da..4776aeaf1 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -48,6 +48,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic VictimWay, input logic FlushWay, input logic Invalidate, + input logic Flush, // input logic [(`XLEN-1)/8:0] ByteMask, input logic [LINELEN/8-1:0] LineByteMask, @@ -86,7 +87,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce, .addr(RAdr), .dout(ReadTag), .bwe('1), - .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidWay)); + .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidWay & ~Flush)); // AND portion of distributed tag multiplexer mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag); @@ -109,7 +110,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .addr(RAdr), .dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), .din(CacheWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), - .we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); + .we(SelectedWriteWordEn & ~Flush), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); end // AND portion of distributed read multiplexers @@ -123,8 +124,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset | Invalidate) ValidBits <= #1 '0; if(ce) begin Valid <= #1 ValidBits[RAdr]; - if (SetValidWay) ValidBits[RAdr] <= #1 1'b1; - else if (ClearValidWay) ValidBits[RAdr] <= #1 1'b0; + if (SetValidWay & ~Flush) ValidBits[RAdr] <= #1 1'b1; + else if (ClearValidWay & ~Flush) ValidBits[RAdr] <= #1 1'b0; end end @@ -138,8 +139,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; if(ce) begin Dirty <= #1 DirtyBits[RAdr]; - if (SetDirtyWay) DirtyBits[RAdr] <= #1 1'b1; - else if (ClearDirtyWay) DirtyBits[RAdr] <= #1 1'b0; + if (SetDirtyWay & ~Flush) DirtyBits[RAdr] <= #1 1'b1; + else if (ClearDirtyWay & ~Flush) DirtyBits[RAdr] <= #1 1'b0; end end end else assign Dirty = 1'b0; diff --git a/pipelined/src/ebu/ahbcacheinterface.sv b/pipelined/src/ebu/ahbcacheinterface.sv index 5652cd023..4600edcd5 100644 --- a/pipelined/src/ebu/ahbcacheinterface.sv +++ b/pipelined/src/ebu/ahbcacheinterface.sv @@ -56,6 +56,7 @@ module ahbcacheinterface #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE input logic Cacheable, // lsu/ifu interface + input logic Flush, input logic [`PA_BITS-1:0] PAdr, input logic [1:0] BusRW, input logic CPUBusy, @@ -83,7 +84,7 @@ module ahbcacheinterface #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLE mux2 #(3) sizemux(.d0(Funct3), .d1(`XLEN == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); buscachefsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) AHBBuscachefsm( - .HCLK, .HRESETn, .BusRW, .CPUBusy, .BusCommitted, .BusStall, .CaptureEn, .SelBusWord, + .HCLK, .HRESETn, .Flush, .BusRW, .CPUBusy, .BusCommitted, .BusStall, .CaptureEn, .SelBusWord, .CacheBusRW, .CacheBusAck, .WordCount, .WordCountDelayed, .HREADY, .HTRANS, .HWRITE, .HBURST); endmodule diff --git a/pipelined/src/ebu/ahbinterface.sv b/pipelined/src/ebu/ahbinterface.sv index be2fbba53..9955cca5a 100644 --- a/pipelined/src/ebu/ahbinterface.sv +++ b/pipelined/src/ebu/ahbinterface.sv @@ -47,6 +47,7 @@ module ahbinterface #(parameter LSU = 0) // **** modify to use LSU/ifu parameter output logic [`XLEN/8-1:0] HWSTRB, // lsu/ifu interface + input logic Flush, input logic [1:0] BusRW, input logic [`XLEN/8-1:0] ByteMask, input logic [`XLEN-1:0] WriteData, @@ -71,7 +72,7 @@ module ahbinterface #(parameter LSU = 0) // **** modify to use LSU/ifu parameter assign HWSTRB = '0; end - busfsm busfsm(.HCLK, .HRESETn, .BusRW, + busfsm busfsm(.HCLK, .HRESETn, .Flush, .BusRW, .BusCommitted, .CPUBusy, .BusStall, .CaptureEn, .HREADY, .HTRANS, .HWRITE); endmodule diff --git a/pipelined/src/ebu/buscachefsm.sv b/pipelined/src/ebu/buscachefsm.sv index da8c0e259..6965c6737 100644 --- a/pipelined/src/ebu/buscachefsm.sv +++ b/pipelined/src/ebu/buscachefsm.sv @@ -38,6 +38,7 @@ module buscachefsm #(parameter integer WordCountThreshold, input logic HRESETn, // IEU interface + input logic Flush, input logic [1:0] BusRW, input logic CPUBusy, output logic BusCommitted, @@ -77,7 +78,7 @@ module buscachefsm #(parameter integer WordCountThreshold, logic CacheAccess; always_ff @(posedge HCLK) - if (~HRESETn) CurrState <= #1 ADR_PHASE; + if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; else CurrState <= #1 NextState; always_comb begin @@ -135,7 +136,7 @@ module buscachefsm #(parameter integer WordCountThreshold, assign BusCommitted = CurrState != ADR_PHASE; // AHB bus interface - assign HTRANS = (CurrState == ADR_PHASE & HREADY & (|BusRW | |CacheBusRW)) | + assign HTRANS = (CurrState == ADR_PHASE & HREADY & (|BusRW | |CacheBusRW) & ~Flush) | (CacheAccess & FinalWordCount & |CacheBusRW & HREADY) ? AHB_NONSEQ : // if we have a pipelined request (CacheAccess & |WordCount) ? (`BURST_EN ? AHB_SEQ : AHB_NONSEQ) : AHB_IDLE; diff --git a/pipelined/src/ebu/busfsm.sv b/pipelined/src/ebu/busfsm.sv index 336c3b4e2..8203fa74e 100644 --- a/pipelined/src/ebu/busfsm.sv +++ b/pipelined/src/ebu/busfsm.sv @@ -36,6 +36,7 @@ module busfsm input logic HRESETn, // IEU interface + input logic Flush, input logic [1:0] BusRW, input logic CPUBusy, output logic BusCommitted, @@ -55,8 +56,8 @@ module busfsm (* mark_debug = "true" *) busstatetype CurrState, NextState; always_ff @(posedge HCLK) - if (~HRESETn) CurrState <= #1 ADR_PHASE; - else CurrState <= #1 NextState; + if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; + else CurrState <= #1 NextState; always_comb begin case(CurrState) @@ -76,7 +77,7 @@ module busfsm assign BusCommitted = CurrState != ADR_PHASE; - assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW) ? AHB_NONSEQ : AHB_IDLE; + assign HTRANS = (CurrState == ADR_PHASE & HREADY & |BusRW & ~Flush) ? AHB_NONSEQ : AHB_IDLE; assign HWRITE = BusRW[0]; assign CaptureEn = CurrState == DATA_PHASE; diff --git a/pipelined/src/hazard/hazard.sv b/pipelined/src/hazard/hazard.sv index 0ca3c5bc4..39700ed0c 100644 --- a/pipelined/src/hazard/hazard.sv +++ b/pipelined/src/hazard/hazard.sv @@ -70,7 +70,7 @@ module hazard( // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap // assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)); // | FDivBusyE; assign StallMCause = ((wfiM) & (~TrapM & ~IntPendingM)); //*** Ross: should FDivBusyE trigger StallECause rather than StallMCause similar to DivBusyE? - assign StallWCause = LSUStallM | IFUStallF | (FDivBusyE & ~TrapM & ~IntPendingM); + assign StallWCause = ((IFUStallF | LSUStallM) & ~TrapM) | (FDivBusyE & ~TrapM & ~IntPendingM); assign #1 StallF = StallFCause | StallD; assign #1 StallD = StallDCause | StallE; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 8f632acb9..93804877d 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -34,7 +34,7 @@ module ifu ( input logic clk, reset, input logic StallF, StallD, StallE, StallM, - input logic FlushF, FlushD, FlushE, FlushM, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Bus interface (* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, (* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUHADDR, @@ -130,7 +130,7 @@ module ifu ( if(`C_SUPPORTED) begin : SpillSupport - spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF(InstrRawF), + spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .Flush(TrapM), .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF(InstrRawF), .InstrDAPageFaultF, .IFUCacheBusStallF, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpillSupport @@ -194,7 +194,7 @@ module ifu ( assign CommittedF = CacheCommittedF | BusCommittedF; logic IgnoreRequest; - assign IgnoreRequest = ITLBMissF | TrapM; + assign IgnoreRequest = ITLBMissF | FlushD; // The IROM uses untranslated addresses, so it is not compatible with virtual memory. if (`IROM_SUPPORTED) begin : irom @@ -215,12 +215,12 @@ module ifu ( logic [1:0] CacheBusRW, BusRW, CacheRWF; //assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableF, CacheableF} & ~{SelIROM, SelIROM}; - assign BusRW = ~IgnoreRequest & ~CacheableF & ~SelIROM ? IFURWF : '0; - assign CacheRWF = ~IgnoreRequest & CacheableF & ~SelIROM ? IFURWF : '0; + assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0; + assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) - icache(.clk, .reset, .CPUBusy, + icache(.clk, .reset, .Flush(FlushW), .CPUBusy, .FetchBuffer, .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheBusRW, @@ -237,7 +237,7 @@ module ifu ( ahbcacheinterface #(WORDSPERLINE, LINELEN, LOGBWPL, `ICACHE) ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), .HRDATA, - .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), + .Flush(FlushW), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .Funct3(3'b010), .HADDR(IFUHADDR), .HREADY(IFUHREADY), .HWRITE(IFUHWRITE), .CacheBusAdr(ICacheBusAdr), .WordCount(), .Cacheable(CacheableF), .SelBusWord(), .CacheBusAck(ICacheBusAck), @@ -252,11 +252,11 @@ module ifu ( logic CaptureEn; logic [31:0] FetchBuffer; logic [1:0] BusRW; - assign BusRW = ~IgnoreRequest & ~SelIROM ? IFURWF : '0; + assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0; // assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{SelIROM, SelIROM}; assign IFUHSIZE = 3'b010; - ahbinterface #(0) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(IFUHREADY), + ahbinterface #(0) ahbinterface(.HCLK(clk), .Flush(FlushW), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), .CPUBusy, .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); diff --git a/pipelined/src/ifu/spillsupport.sv b/pipelined/src/ifu/spillsupport.sv index 56bd3a78c..b247c2d32 100644 --- a/pipelined/src/ifu/spillsupport.sv +++ b/pipelined/src/ifu/spillsupport.sv @@ -35,7 +35,7 @@ module spillsupport #(parameter CACHE_ENABLED) (input logic clk, input logic reset, - input logic StallF, + input logic StallF, Flush, input logic [`XLEN-1:0] PCF, input logic [`XLEN-3:0] PCPlusUpperF, input logic [`XLEN-1:0] PCNextF, @@ -61,7 +61,7 @@ module spillsupport #(parameter CACHE_ENABLED) mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlusUpperF, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); - mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF), + mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~Flush), .y(PCNextFSpill)); mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill)); @@ -69,7 +69,7 @@ module spillsupport #(parameter CACHE_ENABLED) assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF)); always_ff @(posedge clk) - if (reset) CurrState <= #1 STATE_READY; + if (reset | Flush) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; always_comb begin @@ -89,7 +89,7 @@ module spillsupport #(parameter CACHE_ENABLED) assign SavedInstr = CACHE_ENABLED ? InstrRawF[15:0] : InstrRawF[31:16]; flopenr #(16) SpillInstrReg(.clk(clk), - .en(SpillSaveF), + .en(SpillSaveF & ~Flush), .reset(reset), .d(SavedInstr), .q(SpillDataLine0)); diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv index 45ada1c6c..8f9918835 100644 --- a/pipelined/src/lsu/dtim.sv +++ b/pipelined/src/lsu/dtim.sv @@ -30,13 +30,13 @@ `include "wally-config.vh" module dtim( - input logic clk, reset, ce, - input logic [1:0] MemRWM, - input logic [`PA_BITS-1:0] Adr, - input logic TrapM, - input logic [`LLEN-1:0] WriteDataM, - input logic [`LLEN/8-1:0] ByteMaskM, - output logic [`LLEN-1:0] ReadDataWordM + input logic clk, reset, ce, + input logic [1:0] MemRWM, + input logic [`PA_BITS-1:0] Adr, + input logic FlushW, + input logic [`LLEN-1:0] WriteDataM, + input logic [`LLEN/8-1:0] ByteMaskM, + output logic [`LLEN-1:0] ReadDataWordM ); logic we; @@ -44,7 +44,7 @@ module dtim( localparam ADDR_WDITH = $clog2(`DTIM_RANGE/8); localparam OFFSET = $clog2(`LLEN/8); - assign we = MemRWM[0] & ~TrapM; // have to ignore write if Trap. + assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. sram1p1rw #(.DEPTH(`DTIM_RANGE/8), .WIDTH(`LLEN)) ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index d469e6721..5be7c9311 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -47,7 +47,6 @@ module lsu ( input logic [2:0] Funct3M, input logic [6:0] Funct7M, input logic [1:0] AtomicM, - input logic TrapM, input logic FlushDCacheM, output logic CommittedM, output logic SquashSCW, @@ -131,7 +130,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED lsuvirtmem lsuvirtmem(.clk, .reset, .StallW, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, - .TrapM, .DCacheStallM, .SATP_REGW, .PCF, + .FlushW, .DCacheStallM, .SATP_REGW, .PCF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, .IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, @@ -203,7 +202,7 @@ module lsu ( logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; logic [`LLEN-1:0] ReadDataWordMuxM, DTIMReadDataWordM, DCacheReadDataWordM; logic IgnoreRequest; - assign IgnoreRequest = IgnoreRequestTLB | TrapM; + assign IgnoreRequest = IgnoreRequestTLB | FlushW; if (`DTIM_SUPPORTED) begin : dtim logic [`PA_BITS-1:0] DTIMAdr; @@ -211,12 +210,12 @@ module lsu ( // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. assign DTIMAdr = MemRWM[0] ? IEUAdrExtM[`PA_BITS-1:0] : IEUAdrExtE[`PA_BITS-1:0]; // zero extend or contract to PA_BITS - assign DTIMMemRWM = SelDTIM & ~IgnoreRequest ? LSURWM : '0; + assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. dtim dtim(.clk, .reset, .ce(~CPUBusy), .MemRWM(DTIMMemRWM), .Adr(DTIMAdr), - .TrapM, .WriteDataM(LSUWriteDataM), + .FlushW, .WriteDataM(LSUWriteDataM), .ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); end else begin end @@ -242,15 +241,15 @@ module lsu ( logic [1:0] CacheRWM, CacheAtomicM; logic CacheFlushM; - assign BusRW = ~CacheableM & ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; + assign BusRW = ~CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; - assign CacheRWM = CacheableM & ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; - assign CacheAtomicM = CacheableM & ~IgnoreRequest & ~SelDTIM ? LSUAtomicM : '0; - assign CacheFlushM = ~TrapM & FlushDCacheM; + assign CacheRWM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; + assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0; + assign CacheFlushM = FlushDCacheM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .DCACHE(1)) dcache( - .clk, .reset, .CPUBusy, .SelBusWord, .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), + .clk, .reset, .CPUBusy, .SelBusWord, .Flush(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(CacheFlushM), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .WordCount(WordCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .FinalWriteData(LSUWriteDataM), .SelHPTW, @@ -260,7 +259,7 @@ module lsu ( .FetchBuffer, .CacheBusRW, .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); ahbcacheinterface #(.WORDSPERLINE(AHBWWORDSPERLINE), .LINELEN(LINELEN), .LOGWPL(AHBWLOGBWPL), .CACHE_ENABLED(`DCACHE)) ahbcacheinterface( - .HCLK(clk), .HRESETn(~reset), + .HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HRDATA, .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), .WordCount, .SelBusWord, @@ -303,13 +302,13 @@ module lsu ( logic CaptureEn; logic [1:0] BusRW; logic [`XLEN-1:0] FetchBuffer; - assign BusRW = ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; + assign BusRW = ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; // assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest} & ~{SelDTIM, SelDTIM}; assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; - ahbinterface #(1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(LSUHREADY), + ahbinterface #(1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM), .CPUBusy, .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv index 6530d12d6..d1476db88 100644 --- a/pipelined/src/lsu/lsuvirtmen.sv +++ b/pipelined/src/lsu/lsuvirtmen.sv @@ -39,7 +39,7 @@ module lsuvirtmem( output logic DTLBWriteM, input logic InstrDAPageFaultF, input logic DataDAPageFaultM, - input logic TrapM, + input logic FlushW, input logic DCacheStallM, input logic [`XLEN-1:0] SATP_REGW, // from csr input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, @@ -80,11 +80,13 @@ module lsuvirtmem( // move all the muxes to walkermux and instantiate these in lsu under virtmem_supported. assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); - assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF & ~TrapM; - assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM & ~TrapM; + //assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF & ~TrapM; + assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF; + //assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM & ~TrapM; + assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM; hptw hptw( - .clk, .reset, .SATP_REGW, .PCF, .IEUAdrExtM, .MemRWM, .AtomicM, + .clk, .reset, .SATP_REGW, .PCF, .IEUAdrExtM, .MemRWM, .AtomicM, .FlushW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ITLBMissOrDAFaultNoTrapF, .DTLBMissOrDAFaultNoTrapM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, .HPTWReadPTE(ReadDataM), // *** should it be HPTWReadDataM diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index e2b2573ed..52f7e868e 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -37,6 +37,7 @@ module hptw input logic [`XLEN-1:0] PCF, // addresses to translate input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate input logic [1:0] MemRWM, AtomicM, + input logic FlushW, // system status input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, input logic [1:0] STATUS_MPP, @@ -217,7 +218,14 @@ module hptw end // Page Table Walker FSM - flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + // there is a bug here. Each memory access needs to be potentially flushed if the PMA/P checkers + // generate an access fault. Specially the store on UDPATE_PTE needs to check for access violation. + // I think the solution is to do 1 of the following + // 1. Allow the HPTW to generate exceptions and stop walking immediately. + // 2. If the store would generate an exception don't store to dcache but still write the TLB. When we go back + // to LEAF then the PMA/P. Wait this does not work. The PMA/P won't be looking a the address in the table, but + // rather than physical address of the translated instruction/data. So we must generate the exception. + flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset | FlushW, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb case (WalkerState) IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; @@ -250,3 +258,7 @@ module hptw assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); endmodule + +// another idea. We keep gating the control by ~TrapM, but this adds considerable length to the critical path. +// should we do this differently? For example TLBMiss is gated by ~TrapM and then drives HPTWStall, which drives LSUStallM, which drives +// the hazard unit to issue stall and flush controlls. ~TrapM already suppresses these in the hazard unit. diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 277ca4266..bce17875a 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -170,7 +170,7 @@ module wallypipelinedcore ( ifu ifu( .clk, .reset, .StallF, .StallD, .StallE, .StallM, - .FlushF, .FlushD, .FlushE, .FlushM, + .FlushF, .FlushD, .FlushE, .FlushM, .FlushW, // Fetch .HRDATA, .PCF, .IFUHADDR, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, @@ -249,7 +249,7 @@ module wallypipelinedcore ( .FlushW, // CPU interface .MemRWM, .Funct3M, .Funct7M(InstrM[31:25]), - .AtomicM, .TrapM, + .AtomicM, .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, .FpLoadStoreM,