diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b3e810ee2..8cae76a02 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -35,9 +35,11 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic StallM, FlushM, input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM + input logic [2:0] Funct3M, // Size of memory operation input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request + input logic DataUpdateDAM, // ITLB miss, ignore memory request output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill @@ -49,10 +51,10 @@ module align import cvw::*; #(parameter cvw_t P) ( statetype CurrState, NextState; logic TakeSpillM, TakeSpillE; - logic SpillF; + logic SpillM; logic SelSpillF; logic SpillSaveF; - logic [15:0] InstrFirstHalfF; + logic [LLEN-8:0] ReadDataWordFirstHalfM; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic @@ -71,19 +73,23 @@ module align import cvw::*; #(parameter cvw_t P) ( // spill detection in lsu is more complex than ifu, depends on 3 factors // 1) operation size // 2) offset - // 3) access location within the cacheline or is the access is uncached. - // first consider uncached operations - // accesses are always aligned to the natural size of the bus (XLEN or AHBW) - - if (P.ICACHE_SUPPORTED) begin - logic SpillCachedF, SpillUncachedF; - assign SpillCachedF = &IEUAdrM[$clog2(P.ICACHE_LINELENINBITS/32)+1:1]; - assign SpillUncachedF = IEUAdrM[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64 - assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF; - end else - assign SpillF = IEUAdrM[1]; // *** might relax - only spill if next instruction is uncompressed + // 3) access location within the cacheline + logic [P.DCACHE_LINELENINBITS/8-1:P.LLEN/8] WordOffsetM; + logic [P.LLEN/8-1:0] ByteOffsetM; + logic HalfSpillM, WordSpillM; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[P.DCACHE_LINELENINBITS/8-1:0]; + assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; + assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + if(P.LLEN == 64) begin + logic DoubleSpillM; + assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign SpillM = HalfSpillM | WordOffsetM | DoubleSpillM; + end else begin + assign SpillM = HalfSpillM | WordOffsetM; + end + // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF)); + assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -91,7 +97,7 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; + STATE_READY: if (TakeSpillM) NextState = STATE_SPILL; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; @@ -99,16 +105,16 @@ module align import cvw::*; #(parameter cvw_t P) ( endcase end - assign SelSpillF = (CurrState == STATE_SPILL); - assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF); - assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushM; + assign SelSpillM = (CurrState == STATE_SPILL); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM); + assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled instruction //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); + flopenr #(P.LLEN-8) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[LLEN-1:8], ReadDataWordFirstHalfM); // merge together mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 191599f12..8dc843a38 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -234,6 +234,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. + // Add support for cboz dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0])); @@ -268,8 +269,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign CacheAtomicM = CacheableM & ~SelDTIM ? LSUAtomicM : '0; assign FlushDCache = FlushDCacheM & ~(SelHPTW); - // *** need RT to add support for CMOpM and LSUPrefetchM (DH 7/2/23) - // *** prefetch can just act as a read operation cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), @@ -285,6 +284,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DCacheStallM = CacheStall & ~IgnoreRequestTLB; assign CacheBusRW = CacheBusRWTemp; + // *** add support for cboz ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index ae3e3c78b..e5666eb84 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,125 +29,22 @@ module subwordread #(parameter LLEN) ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [$clog(LLEN/8)-1:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN/2-1:0] ReadDataM + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM ); - localparam OFFSET_LEN = $clog(LLEN/8); - localparam HLEN = LLEN/2; logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [OFFSET_LEN-1:0] PAdrSwap; + logic [2:0] PAdrSwap; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; + assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; - if (LLEN == 128) begin:swrmux - // ByteMe mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: ByteM = ReadDataWordMuxM[7:0]; - 4'b0001: ByteM = ReadDataWordMuxM[15:8]; - 4'b0010: ByteM = ReadDataWordMuxM[23:16]; - 4'b0011: ByteM = ReadDataWordMuxM[31:24]; - 4'b0100: ByteM = ReadDataWordMuxM[39:32]; - 4'b0101: ByteM = ReadDataWordMuxM[47:40]; - 4'b0110: ByteM = ReadDataWordMuxM[55:48]; - 4'b0111: ByteM = ReadDataWordMuxM[63:56]; - 4'b1000: ByteM = ReadDataWordMuxM[71:64]; - 4'b1001: ByteM = ReadDataWordMuxM[79:72]; - 4'b1010: ByteM = ReadDataWordMuxM[87:80]; - 4'b1011: ByteM = ReadDataWordMuxM[95:88]; - 4'b1100: ByteM = ReadDataWordMuxM[103:96]; - 4'b1101: ByteM = ReadDataWordMuxM[111:104]; - 4'b1110: ByteM = ReadDataWordMuxM[119:112]; - 4'b1111: ByteM = ReadDataWordMuxM[127:120]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; - 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; - 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; - 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; - 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; - 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; - 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; - 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; - 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; - 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; - 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; - 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; - 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; - 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; - 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; - //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - endcase - - logic [31:0] WordM; - - always_comb - case(PAdrSwap[3:0]) - 4'b0000: WordM = ReadDataWordMuxM[31:0]; - 4'b0001: WordM = ReadDataWordMuxM[39:8]; - 4'b0010: WordM = ReadDataWordMuxM[47:16]; - 4'b0011: WordM = ReadDataWordMuxM[55:24]; - 4'b0100: WordM = ReadDataWordMuxM[63:32]; - 4'b0101: WordM = ReadDataWordMuxM[71:40]; - 4'b0111: WordM = ReadDataWordMuxM[79:48]; - 4'b1000: WordM = ReadDataWordMuxM[87:56]; - 4'b1001: WordM = ReadDataWordMuxM[95:64]; - 4'b1010: WordM = ReadDataWordMuxM[103:72]; - 4'b1011: WordM = ReadDataWordMuxM[111:80]; - 4'b1011: WordM = ReadDataWordMuxM[119:88]; - 4'b1100: WordM = ReadDataWordMuxM[127:96]; - 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; - endcase - - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; - 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; - 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; - 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; - 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; - 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; - 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; - 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux + if (LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -163,55 +60,35 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[2:0]) - 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; - 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; - 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; - 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; - 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; - 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; - 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; - 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; + case(PAdrSwap[2:1]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; + 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; endcase logic [31:0] WordM; always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + case(PAdrSwap[2]) + 1'b0: WordM = ReadDataWordMuxM[31:0]; + 1'b1: WordM = ReadDataWordMuxM[63:32]; endcase logic [63:0] DblWordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; - 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; - 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; - 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; - 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; - 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; - 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; - 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; - endcase + assign DblWordM = ReadDataWordMuxM[63:0]; // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase @@ -227,22 +104,20 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[1:0]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; - 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; + case(PAdrSwap[1]) + 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; + 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; endcase // sign extension always_comb case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase end