At least have the aligner integrated, but not tested.

This commit is contained in:
Rose Thompson 2023-10-27 13:55:16 -05:00
parent 657409aec5
commit 36ca64c567
2 changed files with 33 additions and 15 deletions

View File

@ -36,15 +36,15 @@ module align import cvw::*; #(parameter cvw_t P) (
input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage
input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM
input logic [2:0] Funct3M, // Size of memory operation
input logic [P.LLEN*2-1:0]ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
input logic [P.LLEN*2-1:0]DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
input logic DTLBMissM, // ITLB miss, ignore memory request
input logic DataUpdateDAM, // ITLB miss, ignore memory request
output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill
output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill
output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline
output logic [P.LLEN-1:0] ReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
output logic [P.LLEN-1:0] DCacheReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
// Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1]
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
@ -91,7 +91,7 @@ module align import cvw::*; #(parameter cvw_t P) (
end
// Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits
assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM));
assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM));
always_ff @(posedge clk)
if (reset | FlushM) CurrState <= #1 STATE_READY;
@ -108,7 +108,7 @@ module align import cvw::*; #(parameter cvw_t P) (
end
assign SelSpillM = (CurrState == STATE_SPILL);
assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM);
assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall);
assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM;
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -116,10 +116,10 @@ module align import cvw::*; #(parameter cvw_t P) (
////////////////////////////////////////////////////////////////////////////////////////////////////
// save the first 2 bytes
flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM);
flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM);
// merge together
mux2 #(2*P.LLEN) postspillmux(ReadDataWordMuxM, {ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM);
mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM);
// align by shifting
// *** optimize by merging with halfSpill, WordSpill, etc
@ -136,6 +136,6 @@ module align import cvw::*; #(parameter cvw_t P) (
// shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit)
// 8 * is for shifting by bytes not bits
assign ReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0);
assign DCacheReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0);
endmodule

View File

@ -92,6 +92,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit
input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit
);
localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED;
logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer
@ -108,13 +109,18 @@ module lsu import cvw::*; #(parameter cvw_t P) (
logic BusStall; // Bus interface busy with multicycle operation
logic HPTWStall; // HPTW busy with multicycle operation
logic CacheBusHPWTStall; // Cache, bus, or hptw is requesting a stall
logic SelSpillE; // Align logic detected a spill and needs to stall
logic CacheableM; // PMA indicates memory address is cacheable
logic BusCommittedM; // Bus memory operation in flight, delay interrupts
logic DCacheCommittedM; // D$ memory operation started, delay interrupts
logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data
logic [P.LLEN-1:0] DCacheReadDataWordM; // D$ read data
/* verilator lint_off WIDTHEXPAND */
logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data
/* verilator lint_on WIDTHEXPAND */
logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data
logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data
logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data
logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection
@ -142,8 +148,19 @@ module lsu import cvw::*; #(parameter cvw_t P) (
/////////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM);
if(MISALIGN_SUPPORT) begin : ziccslm_align
logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM;
align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M,
.DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM,
.IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM);
assign IEUAdrExtM = {2'b00, IEUAdrSpillM};
assign IEUAdrExtE = {2'b00, IEUAdrSpillE};
end else begin : no_ziccslm_align
assign IEUAdrExtM = {2'b00, IEUAdrM};
assign IEUAdrExtE = {2'b00, IEUAdrE};
assign SelSpillE = '0;
assign DCacheReadDataWordSpillM = DCacheReadDataWordM;
end
/////////////////////////////////////////////////////////////////////////////////////////////
// HPTW (only needed if VM supported)
@ -180,7 +197,8 @@ module lsu import cvw::*; #(parameter cvw_t P) (
// the trap module.
assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM;
assign GatedStallW = StallW & ~SelHPTW;
assign LSUStallM = DCacheStallM | HPTWStall | BusStall;
assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall;
assign LSUStallM = CacheBusHPWTStall | SelSpillE;
/////////////////////////////////////////////////////////////////////////////////////////////
// MMU and misalignment fault logic required if privileged unit exists
@ -273,7 +291,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache(
.clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM),
.FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM),
.FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM),
.ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]),
.CacheWriteData(LSUWriteDataM), .SelHPTW,
.CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
@ -290,7 +308,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
.HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB),
.HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB),
.HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY),
.BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM), .WriteDataM(LSUWriteDataM),
.BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM),
.Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheableOrFlushCacheM,
.CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM),
.Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW),
@ -300,7 +318,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
// Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times.
// *** DTIMReadDataWordM should be increased to LLEN.
// pma should generate exception for LLEN read to periph.
mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}),
mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}),
.d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}),
.s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM));
end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface