Optimization of cache save/restore.

This commit is contained in:
Ross Thompson 2022-02-04 14:18:10 -06:00
parent 7c1f7e335c
commit 459054900f
2 changed files with 24 additions and 22 deletions

View File

@ -107,6 +107,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
logic SelFlush; logic SelFlush;
logic ResetOrFlushAdr, ResetOrFlushWay; logic ResetOrFlushAdr, ResetOrFlushWay;
logic save, restore; logic save, restore;
logic [NUMWAYS-1:0] WayHitSaved, WayHitRaw;
logic [LINELEN-1:0] ReadDataLineRaw, ReadDataLineSaved;
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Read Path // Read Path
@ -127,8 +129,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
.TagWriteEnable(SRAMLineWayWriteEnable), .TagWriteEnable(SRAMLineWayWriteEnable),
.WriteData(SRAMWriteData), .WriteData(SRAMWriteData),
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .Victim(VictimWay), .Flush(FlushWay), .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .Victim(VictimWay), .Flush(FlushWay),
.save, .restore, .SelFlush, .SelFlush,
.SelectedReadDataLine(ReadDataLineWay), .WayHit, .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), .SelectedReadDataLine(ReadDataLineWay), .WayHit(WayHitRaw), .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay),
.InvalidateAll(InvalidateCacheM)); .InvalidateAll(InvalidateCacheM));
if(NUMWAYS > 1) begin:vict if(NUMWAYS > 1) begin:vict
cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy(
@ -139,10 +141,20 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
// ReadDataLineWay is a 2d array of cache line len by number of ways. // ReadDataLineWay is a 2d array of cache line len by number of ways.
// Need to OR together each way in a bitwise manner. // Need to OR together each way in a bitwise manner.
// Final part of the AO Mux. First is the AND in the cacheway. // Final part of the AO Mux. First is the AND in the cacheway.
or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLine)); or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineRaw));
or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag));
// Because of the sram clocked read when the ieu is stalled the read data maybe lost.
// There are two ways to resolve. 1. We can replay the read of the sram or we can save
// the data. Replay is eaiser but creates a longer critical path.
// save/restore only wayhit and readdata.
flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, WayHitRaw, WayHitSaved);
flopen #(LINELEN) cachereadsavereg(clk, save, ReadDataLineRaw, ReadDataLineSaved);
mux2 #(NUMWAYS+LINELEN) saverestoremux({WayHitRaw, ReadDataLineRaw}, {WayHitSaved, ReadDataLineSaved},
restore, {WayHit, ReadDataLine});
// Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can
// easily build a variable input mux. // easily build a variable input mux.
// *** move this to LSU and IFU, also remove mux from busdp into LSU. // *** move this to LSU and IFU, also remove mux from busdp into LSU.

View File

@ -51,7 +51,6 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic InvalidateAll, input logic InvalidateAll,
input logic SelFlush, input logic SelFlush,
input logic Flush, input logic Flush,
input logic save, restore,
output logic [LINELEN-1:0] SelectedReadDataLine, output logic [LINELEN-1:0] SelectedReadDataLine,
output logic WayHit, output logic WayHit,
@ -60,10 +59,10 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] ValidBits;
logic [NUMLINES-1:0] DirtyBits; logic [NUMLINES-1:0] DirtyBits;
logic [LINELEN-1:0] ReadDataLine, ReadDataLineRaw, ReadDataLineSaved; logic [LINELEN-1:0] ReadDataLine;
logic [TAGLEN-1:0] ReadTag, ReadTagRaw, ReadTagSaved; logic [TAGLEN-1:0] ReadTag;
logic Valid, ValidRaw, ValidSaved; logic Valid;
logic Dirty, DirtyRaw, DirtySaved; logic Dirty;
logic SelData; logic SelData;
logic SelTag; logic SelTag;
@ -77,7 +76,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
sram1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk(clk), sram1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk(clk),
.Adr(RAdr), .ReadData(ReadTagRaw), .Adr(RAdr), .ReadData(ReadTag),
.WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(TagWriteEnable)); .WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(TagWriteEnable));
// AND portion of distributed tag multiplexer // AND portion of distributed tag multiplexer
@ -93,7 +92,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
genvar words; genvar words;
for(words = 0; words < LINELEN/`XLEN; words++) begin: word for(words = 0; words < LINELEN/`XLEN; words++) begin: word
sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr), sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr),
.ReadData(ReadDataLineRaw[(words+1)*`XLEN-1:words*`XLEN] ), .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ),
.WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]),
.WriteEnable(WriteEnable & WriteWordEnable[words])); .WriteEnable(WriteEnable & WriteWordEnable[words]));
end end
@ -116,7 +115,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD);
flop #(4) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable, VDWriteEnable}, flop #(4) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable, VDWriteEnable},
{SetValidD, ClearValidD, WriteEnableD, VDWriteEnableD}); {SetValidD, ClearValidD, WriteEnableD, VDWriteEnableD});
assign ValidRaw = ValidBits[RAdrD]; assign Valid = ValidBits[RAdrD];
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
// Dirty Bits // Dirty Bits
@ -130,18 +129,9 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b0; else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b0;
end end
flop #(2) DirtyCtlDelayReg(clk, {SetDirty, ClearDirty}, {SetDirtyD, ClearDirtyD}); flop #(2) DirtyCtlDelayReg(clk, {SetDirty, ClearDirty}, {SetDirtyD, ClearDirtyD});
assign DirtyRaw = DirtyBits[RAdrD]; assign Dirty = DirtyBits[RAdrD];
flopenr #(1) cachedirtysavereg(clk, reset, save, DirtyRaw, DirtySaved);
mux2 #(1) saverestoredirtymux(DirtyRaw, DirtySaved, restore, Dirty);
end else assign Dirty = 1'b0; end else assign Dirty = 1'b0;
// save restore option of handling cpu busy
flopen #(TAGLEN+LINELEN) cachereadsavereg(clk, save, {ReadTagRaw, ReadDataLineRaw}, {ReadTagSaved, ReadDataLineSaved});
flopenr #(1) cachevalidsavereg(clk, reset, save, ValidRaw, ValidSaved);
mux2 #(1+TAGLEN+LINELEN) saverestoremux({ValidRaw, ReadTagRaw, ReadDataLineRaw}, {ValidSaved, ReadTagSaved, ReadDataLineSaved},
restore, {Valid, ReadTag, ReadDataLine});
endmodule endmodule