mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge pull request #560 from ross144/main
Removed unnecessary spill on uncompressed instruction when aligned to end of cache line or uncached access. Improves Coremark from 2.97 to 2.99.
This commit is contained in:
commit
c62c351aa7
@ -48,13 +48,12 @@ module ahbinterface #(
|
|||||||
input logic [XLEN-1:0] WriteData, // IEU write data for a store
|
input logic [XLEN-1:0] WriteData, // IEU write data for a store
|
||||||
output logic BusStall, // Bus is busy with an in flight memory operation
|
output logic BusStall, // Bus is busy with an in flight memory operation
|
||||||
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
|
output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt
|
||||||
output logic [(LSU ? XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
|
output logic [XLEN-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus
|
||||||
);
|
);
|
||||||
|
|
||||||
logic CaptureEn;
|
logic CaptureEn;
|
||||||
localparam LEN = (LSU ? XLEN : 32); // 32 bits for IFU, XLEN for LSU
|
|
||||||
|
|
||||||
flopen #(LEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA[LEN-1:0]), .q(FetchBuffer));
|
flopen #(XLEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA), .q(FetchBuffer));
|
||||||
|
|
||||||
if(LSU) begin
|
if(LSU) begin
|
||||||
// delay HWDATA by 1 cycle per spec; assumes AHBW = XLEN
|
// delay HWDATA by 1 cycle per spec; assumes AHBW = XLEN
|
||||||
|
@ -99,6 +99,7 @@ module ifu import cvw::*; #(parameter cvw_t P) (
|
|||||||
);
|
);
|
||||||
|
|
||||||
localparam [31:0] nop = 32'h00000013; // instruction for NOP
|
localparam [31:0] nop = 32'h00000013; // instruction for NOP
|
||||||
|
localparam LINELEN = P.ICACHE_SUPPORTED ? P.ICACHE_LINELENINBITS : P.XLEN;
|
||||||
|
|
||||||
logic [P.XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4
|
logic [P.XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4
|
||||||
logic [P.XLEN-1:0] PC1NextF; // Branch predictor next PCF
|
logic [P.XLEN-1:0] PC1NextF; // Branch predictor next PCF
|
||||||
@ -136,6 +137,8 @@ module ifu import cvw::*; #(parameter cvw_t P) (
|
|||||||
logic CacheCommittedF; // I$ memory operation started, delay interrupts
|
logic CacheCommittedF; // I$ memory operation started, delay interrupts
|
||||||
logic SelIROM; // PMA indicates instruction address is in the IROM
|
logic SelIROM; // PMA indicates instruction address is in the IROM
|
||||||
logic [15:0] InstrRawE, InstrRawM;
|
logic [15:0] InstrRawE, InstrRawM;
|
||||||
|
logic [LINELEN-1:0] FetchBuffer;
|
||||||
|
logic [31:0] ShiftUncachedInstr;
|
||||||
|
|
||||||
assign PCFExt = {2'b00, PCSpillF};
|
assign PCFExt = {2'b00, PCSpillF};
|
||||||
|
|
||||||
@ -225,9 +228,7 @@ module ifu import cvw::*; #(parameter cvw_t P) (
|
|||||||
localparam LOGBWPL = P.ICACHE_SUPPORTED ? $clog2(WORDSPERLINE) : 1;
|
localparam LOGBWPL = P.ICACHE_SUPPORTED ? $clog2(WORDSPERLINE) : 1;
|
||||||
|
|
||||||
if(P.ICACHE_SUPPORTED) begin : icache
|
if(P.ICACHE_SUPPORTED) begin : icache
|
||||||
localparam LINELEN = P.ICACHE_SUPPORTED ? P.ICACHE_LINELENINBITS : P.XLEN;
|
|
||||||
localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
|
localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation)
|
||||||
logic [LINELEN-1:0] FetchBuffer;
|
|
||||||
logic [P.PA_BITS-1:0] ICacheBusAdr;
|
logic [P.PA_BITS-1:0] ICacheBusAdr;
|
||||||
logic ICacheBusAck;
|
logic ICacheBusAck;
|
||||||
logic [1:0] CacheBusRW, BusRW, CacheRWF;
|
logic [1:0] CacheBusRW, BusRW, CacheRWF;
|
||||||
@ -264,16 +265,10 @@ module ifu import cvw::*; #(parameter cvw_t P) (
|
|||||||
.BusRW, .Stall(GatedStallD),
|
.BusRW, .Stall(GatedStallD),
|
||||||
.BusStall, .BusCommitted(BusCommittedF));
|
.BusStall, .BusCommitted(BusCommittedF));
|
||||||
|
|
||||||
logic [31:0] ShiftUncachedInstr;
|
|
||||||
|
|
||||||
if(P.XLEN == 64) mux4 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], FetchBuffer[48-1:16], FetchBuffer[64-1:32], {16'b0, FetchBuffer[64-1:48]},
|
|
||||||
PCSpillF[2:1], ShiftUncachedInstr);
|
|
||||||
else mux2 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], {16'b0, FetchBuffer[32-1:16]}, PCSpillF[1], ShiftUncachedInstr);
|
|
||||||
mux3 #(32) UnCachedDataMux(.d0(ICacheInstrF), .d1(ShiftUncachedInstr), .d2(IROMInstrF),
|
mux3 #(32) UnCachedDataMux(.d0(ICacheInstrF), .d1(ShiftUncachedInstr), .d2(IROMInstrF),
|
||||||
.s({SelIROM, ~CacheableF}), .y(InstrRawF[31:0]));
|
.s({SelIROM, ~CacheableF}), .y(InstrRawF[31:0]));
|
||||||
end else begin : passthrough
|
end else begin : passthrough
|
||||||
assign IFUHADDR = PCPF;
|
assign IFUHADDR = PCPF;
|
||||||
logic [31:0] FetchBuffer;
|
|
||||||
logic [1:0] BusRW;
|
logic [1:0] BusRW;
|
||||||
assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0;
|
assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0;
|
||||||
assign IFUHSIZE = 3'b010;
|
assign IFUHSIZE = 3'b010;
|
||||||
@ -284,8 +279,8 @@ module ifu import cvw::*; #(parameter cvw_t P) (
|
|||||||
.Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer));
|
.Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer));
|
||||||
|
|
||||||
assign CacheCommittedF = '0;
|
assign CacheCommittedF = '0;
|
||||||
if(P.IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF);
|
if(P.IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(ShiftUncachedInstr, IROMInstrF, SelIROM, InstrRawF);
|
||||||
else assign InstrRawF = FetchBuffer;
|
else assign InstrRawF = ShiftUncachedInstr;
|
||||||
assign IFUHBURST = 3'b0;
|
assign IFUHBURST = 3'b0;
|
||||||
assign {ICacheMiss, ICacheAccess, ICacheStallF} = '0;
|
assign {ICacheMiss, ICacheAccess, ICacheStallF} = '0;
|
||||||
end
|
end
|
||||||
@ -295,6 +290,11 @@ module ifu import cvw::*; #(parameter cvw_t P) (
|
|||||||
assign InstrRawF = IROMInstrF;
|
assign InstrRawF = IROMInstrF;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
// mux between the alignments of uncached reads.
|
||||||
|
if(P.XLEN == 64) mux4 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], FetchBuffer[48-1:16], FetchBuffer[64-1:32], {16'b0, FetchBuffer[64-1:48]},
|
||||||
|
PCSpillF[2:1], ShiftUncachedInstr);
|
||||||
|
else mux2 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], {16'b0, FetchBuffer[32-1:16]}, PCSpillF[1], ShiftUncachedInstr);
|
||||||
|
|
||||||
assign IFUCacheBusStallF = ICacheStallF | BusStall;
|
assign IFUCacheBusStallF = ICacheStallF | BusStall;
|
||||||
assign IFUStallF = IFUCacheBusStallF | SelSpillNextF;
|
assign IFUStallF = IFUCacheBusStallF | SelSpillNextF;
|
||||||
assign GatedStallD = StallD & ~SelSpillNextF;
|
assign GatedStallD = StallD & ~SelSpillNextF;
|
||||||
|
@ -57,6 +57,7 @@ module spill import cvw::*; #(parameter cvw_t P) (
|
|||||||
logic SelSpillF;
|
logic SelSpillF;
|
||||||
logic SpillSaveF;
|
logic SpillSaveF;
|
||||||
logic [15:0] InstrFirstHalfF;
|
logic [15:0] InstrFirstHalfF;
|
||||||
|
logic EarlyCompressedF;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// PC logic
|
// PC logic
|
||||||
@ -79,12 +80,12 @@ module spill import cvw::*; #(parameter cvw_t P) (
|
|||||||
if (P.ICACHE_SUPPORTED) begin
|
if (P.ICACHE_SUPPORTED) begin
|
||||||
logic SpillCachedF, SpillUncachedF;
|
logic SpillCachedF, SpillUncachedF;
|
||||||
assign SpillCachedF = &PCF[$clog2(P.ICACHE_LINELENINBITS/32)+1:1];
|
assign SpillCachedF = &PCF[$clog2(P.ICACHE_LINELENINBITS/32)+1:1];
|
||||||
assign SpillUncachedF = PCF[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64
|
assign SpillUncachedF = PCF[1];
|
||||||
assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF;
|
assign SpillF = (CacheableF ? SpillCachedF : SpillUncachedF);
|
||||||
end else
|
end else
|
||||||
assign SpillF = PCF[1]; // *** might relax - only spill if next instruction is uncompressed
|
assign SpillF = PCF[1];
|
||||||
// Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits
|
// Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits
|
||||||
assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF));
|
assign TakeSpillF = SpillF & ~EarlyCompressedF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF));
|
||||||
|
|
||||||
always_ff @(posedge clk)
|
always_ff @(posedge clk)
|
||||||
if (reset | FlushD) CurrState <= #1 STATE_READY;
|
if (reset | FlushD) CurrState <= #1 STATE_READY;
|
||||||
@ -112,11 +113,12 @@ module spill import cvw::*; #(parameter cvw_t P) (
|
|||||||
flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF);
|
flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF);
|
||||||
|
|
||||||
// merge together
|
// merge together
|
||||||
mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF);
|
mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SelSpillF, PostSpillInstrRawF);
|
||||||
|
|
||||||
// Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x
|
// Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x
|
||||||
always_comb
|
always_comb
|
||||||
if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1;
|
if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1;
|
||||||
else CompressedF = 1'b0;
|
else CompressedF = 1'b0;
|
||||||
|
assign EarlyCompressedF = ~(&InstrRawF[1:0]);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
Loading…
Reference in New Issue
Block a user