diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 897f0d181..0e399d19a 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -36,15 +36,15 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation - input logic [P.LLEN*2-1:0]ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed - input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic [P.LLEN*2-1:0]DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request input logic DataUpdateDAM, // ITLB miss, ignore memory request output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [P.LLEN-1:0] ReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN-1:0] DCacheReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; @@ -91,7 +91,7 @@ module align import cvw::*; #(parameter cvw_t P) ( end // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); + assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -108,7 +108,7 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign SelSpillM = (CurrState == STATE_SPILL); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -116,10 +116,10 @@ module align import cvw::*; #(parameter cvw_t P) ( //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM); + flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM); // merge together - mux2 #(2*P.LLEN) postspillmux(ReadDataWordMuxM, {ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); + mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); // align by shifting // *** optimize by merging with halfSpill, WordSpill, etc @@ -136,6 +136,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign DCacheReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f2c7647eb..ab0b36d7d 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -92,6 +92,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit ); + localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer @@ -108,13 +109,18 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation + logic CacheBusHPWTStall; // Cache, bus, or hptw is requesting a stall + logic SelSpillE; // Align logic detected a spill and needs to stall logic CacheableM; // PMA indicates memory address is cacheable logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data - logic [P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + /* verilator lint_off WIDTHEXPAND */ + logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + /* verilator lint_on WIDTHEXPAND */ + logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection @@ -142,8 +148,19 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); - assign IEUAdrExtM = {2'b00, IEUAdrM}; - assign IEUAdrExtE = {2'b00, IEUAdrE}; + if(MISALIGN_SUPPORT) begin : ziccslm_align + logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; + align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, + .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); + assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; + assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; + end else begin : no_ziccslm_align + assign IEUAdrExtM = {2'b00, IEUAdrM}; + assign IEUAdrExtE = {2'b00, IEUAdrE}; + assign SelSpillE = '0; + assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + end ///////////////////////////////////////////////////////////////////////////////////////////// // HPTW (only needed if VM supported) @@ -180,7 +197,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign LSUStallM = DCacheStallM | HPTWStall | BusStall; + assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; + assign LSUStallM = CacheBusHPWTStall | SelSpillE; ///////////////////////////////////////////////////////////////////////////////////////////// // MMU and misalignment fault logic required if privileged unit exists @@ -273,7 +291,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), - .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), + .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), @@ -290,7 +308,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM), .WriteDataM(LSUWriteDataM), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -300,7 +318,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. - mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), + mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface