mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
sram1rw cleanup
This commit is contained in:
parent
f9dd79d3e3
commit
97d31cec21
4
pipelined/src/cache/cacheway.sv
vendored
4
pipelined/src/cache/cacheway.sv
vendored
@ -76,7 +76,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
sram1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk(clk),
|
||||
.Addr(RAdr), .ReadData(ReadTag),
|
||||
.Adr(RAdr), .ReadData(ReadTag),
|
||||
.WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(TagWriteEnable));
|
||||
|
||||
// AND portion of distributed tag multiplexer
|
||||
@ -91,7 +91,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
|
||||
// *** Potential optimization: if byte write enables are available, could remove subwordwrites
|
||||
genvar words;
|
||||
for(words = 0; words < LINELEN/`XLEN; words++) begin: word
|
||||
sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Addr(RAdr),
|
||||
sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr),
|
||||
.ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ),
|
||||
.WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]),
|
||||
.WriteEnable(WriteEnable & WriteWordEnable[words]));
|
||||
|
@ -1,90 +0,0 @@
|
||||
Intractions betwen the dcache and hardware page table walker are complex.
|
||||
In particular the complications arise when a fault occurs concurrently with a memory operation.
|
||||
|
||||
At the begining of every memory operation there are 8 combinations of three signals;
|
||||
ITBL miss, DTLB miss, and a memory operation. By looking at each combination we
|
||||
can understand exactly the correct sequence of operations and if the operation
|
||||
should continue.
|
||||
|
||||
It is important to note ITLB misses and faults DO NOT flush a memory operation
|
||||
in the memory stage. This is the core reason for the complexity.
|
||||
|
||||
| Type | ITLB miss | DTLB miss | mem op | |
|
||||
|-------+-----------+-----------+--------+--------------|
|
||||
| 0 | 0 | 0 | 0 | |
|
||||
| 1 | 0 | 0 | 1 | |
|
||||
| 2 | 0 | 1 | 0 | Not possible |
|
||||
| 3 | 0 | 1 | 1 | |
|
||||
| 4 | 1 | 0 | 0 | |
|
||||
| 5 | 1 | 0 | 1 | |
|
||||
| 6 | 1 | 1 | 0 | Not possible |
|
||||
| 7 | 1 | 1 | 1 | |
|
||||
|
||||
|
||||
The above table classifies the operations into 8 categories.
|
||||
2 of the 8 are not possible because a DTLB miss implies a memory operation.
|
||||
Each (I/D)TLB miss results in either a write to the corresponding TLB or a TLB fault.
|
||||
To complicate things it is possilbe to have concurrent ITLB and DTLB misses, which
|
||||
both can result in either a write or a fault. The table belows shows the possible
|
||||
scenarios and the sequence of operations.
|
||||
|
||||
|
||||
| Type | action 1 | action 2 | action 3 | keep stall? |
|
||||
|------+------------------+-----------------+-----------------+-------------|
|
||||
| 1 | D$ handles memop | | | Yes |
|
||||
| 3a | DTLB Write | D$ finish memop | | Yes |
|
||||
| 3b | DTLB Fault | Abort memop | | No |
|
||||
| 4a | ITLB write | | | No |
|
||||
| 4b | ITLB Fault | | | No |
|
||||
| 5a | ITLB Write | D$ finish memop | | Yes |
|
||||
| 5b | ITLB Fault | D$ finish memop | | Yes |
|
||||
| 7a | DTLB Write | ITLB write | D$ finish memop | Yes |
|
||||
| 7b | DTLB Write | ITLB Fault | D$ finish memop | Yes |
|
||||
| 7c | DTLB Fault | Abort all | | No |
|
||||
|
||||
Type 1 is a memory operation which either hits in the DTLB or is a physical address. The
|
||||
Dcache handles the operation.
|
||||
|
||||
Type 3a is a memory operation with a DTLB miss. The Dcache enters a special set of states
|
||||
designed to handle the page table walker (HTPW). Secondly the HPTW takes control over the
|
||||
LSU via a set of multiplexors in the LSU Arbiter, driving the Dcache with addresses of the
|
||||
page table. Interally to the HPTW an FSM checks each node of the Page Table and eventually
|
||||
signals either a TLB write or a TLB Fault. In Type 3a the DTLB is written with the leaf
|
||||
page table entry and returns control of the Dcache back to the IEU. Now the Dcache finishes
|
||||
the memory operation using the physical address provided by the TLB. Note it is crucial
|
||||
the dcache replay the memory access into the cache's SRAM memory. As the HPTW sends it
|
||||
requests through the Dcache the original memory operation's SRAM lookup will be lost.
|
||||
|
||||
Type 3b is similar to the 3a type in that is starts with the same conditions; however the
|
||||
at the end of the page table walk a fault is detched. Rather than update the TLB the CPU
|
||||
and the dcache need to be informed about the fault and abort the memory operation. Unlike
|
||||
Type 3a the dcache returns directly to STATE_READY and lowers the stall.
|
||||
|
||||
Type 4a is the simpliest form of TLB miss as it is an ITLB miss with no memory operation.
|
||||
The Dcache switches in to the special set of page table states and the HPTW takes control
|
||||
of the Dcache. Like with Type 3a the HPTW sends data request through the Dcache and eventually
|
||||
reads a leaf page table entry (PTE). At this time the HPTW writes the PTE to the ITLB and
|
||||
removes the stall as there is not memory operation to do.
|
||||
|
||||
Type 4b is also an ITLB miss. As with 4a the Dcache switches into page table walker mode and reads
|
||||
until it finds a leaf or in this case a fault. The fault is deteched and the Dcaches switches back
|
||||
to normal mode.
|
||||
|
||||
Type 5a is a Type 4a with a current memory operation. The Dcache first switches to walker mode.
|
||||
|
||||
Other traps.
|
||||
A new problem has emerged. What happens when an interrupt occurs during a page table walk?
|
||||
The dcache has an output called CommittedM which tells the CPU if the memory operation is
|
||||
committed into the memory system. It would be wrong to pin the interrupt to a memory operation
|
||||
when it is already or partially committed to the memory system. Instead the next instruction
|
||||
has to be pinned to the interrupt. The complexity occurs with the ITLB miss; types 4, 5 and 7.
|
||||
|
||||
Type 4: The ITLB misses and starts using the dcache to fetch the page table. There is no memory
|
||||
operation. Depending on where in the walk the operations could be aborted. If the tlb is not yet
|
||||
updated then the walk could be aborted. However if the TLB is updated then the interrupt must be
|
||||
delayed until the next instruction.
|
||||
|
||||
What is the meaning of CommittedM?
|
||||
This signal informs the CPU if a memory operation is not started or if it is between started
|
||||
and done. Once a memory op is started it should not be interrupted. This is used to prevent the
|
||||
CPU from generating an interrupt after the operation is partially or completely done.
|
35
pipelined/src/cache/sram1rw.sv
vendored
35
pipelined/src/cache/sram1rw.sv
vendored
@ -34,25 +34,21 @@
|
||||
// WIDTH is number of bits in one "word" of the memory, DEPTH is number of such words
|
||||
|
||||
module sram1rw #(parameter DEPTH=128, WIDTH=256) (
|
||||
input logic clk,
|
||||
// port 1 is read only
|
||||
input logic [$clog2(DEPTH)-1:0] Addr,
|
||||
output logic [WIDTH-1:0] ReadData,
|
||||
|
||||
// port 2 is write only
|
||||
input logic [WIDTH-1:0] WriteData,
|
||||
input logic WriteEnable
|
||||
);
|
||||
input logic clk,
|
||||
input logic [$clog2(DEPTH)-1:0] Adr,
|
||||
input logic [WIDTH-1:0] WriteData,
|
||||
input logic WriteEnable,
|
||||
output logic [WIDTH-1:0] ReadData);
|
||||
|
||||
logic [DEPTH-1:0][WIDTH-1:0] StoredData; // *** inconsistency in packed vs. unpacked
|
||||
logic [$clog2(DEPTH)-1:0] AddrD;
|
||||
logic [WIDTH-1:0] WriteDataD;
|
||||
logic WriteEnableD;
|
||||
|
||||
logic [WIDTH-1:0] StoredData[DEPTH-1:0];
|
||||
logic [$clog2(DEPTH)-1:0] AddrD;
|
||||
logic [WIDTH-1:0] WriteDataD;
|
||||
logic WriteEnableD;
|
||||
|
||||
//*** model as single port
|
||||
always_ff @(posedge clk) begin
|
||||
AddrD <= Addr;
|
||||
WriteDataD <= WriteData; /// ****** this is not right. there should not need to be a delay.
|
||||
AddrD <= Adr;
|
||||
WriteDataD <= WriteData; /// ****** this is not right. there should not need to be a delay. Implement alternative cache stall to avoid this. Eliminates a bunch of delay flops elsewhere
|
||||
WriteEnableD <= WriteEnable;
|
||||
if (WriteEnableD) begin
|
||||
StoredData[AddrD] <= #1 WriteDataD;
|
||||
@ -60,7 +56,12 @@ module sram1rw #(parameter DEPTH=128, WIDTH=256) (
|
||||
end
|
||||
|
||||
assign ReadData = StoredData[AddrD];
|
||||
|
||||
/*
|
||||
always_ff @(posedge clk) begin
|
||||
ReadData <= RAM[Adr];
|
||||
if (WriteEnable) RAM[Adr] <= WriteData;
|
||||
end
|
||||
*/
|
||||
endmodule
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user