From 3e916da36e4c96576554e7f28e3f79a272edc103 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 22 Jul 2021 19:42:19 -0500 Subject: [PATCH] Removed the hardware page table walker fault state from the icache so that the icache will only unstall CPU for 1 cycle. In the dcache we added a register to save the load read data in the event an itlb miss occurs concurrently with the load in the memory stage. Under this situation we need to record the load ReadDataM into a temporary register, SavedReadDataM. At this time the CPU is stall; however the walker is going to change the address in the dcache which destroys this data. When leaving the PTW_READY state via a walker instruction fault or ITLB write we select this SavedReadDataM so that the CPU can capture it. --- wally-pipelined/regression/wave.do | 33 ++++++++++++------------ wally-pipelined/src/cache/ICacheCntrl.sv | 7 +---- wally-pipelined/src/cache/dcache.sv | 31 ++++++++++++++++++++-- wally-pipelined/src/lsu/lsu.sv | 8 +++--- 4 files changed, 51 insertions(+), 28 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index b44c2b04a..daecfc921 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,11 +7,12 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/priv/trap/InstrValidM -add wave -noupdate -expand -group {Memory Stage} /testbench/PCtextM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate /testbench/dut/hart/ieu/dp/ResultM +add wave -noupdate /testbench/dut/hart/ieu/dp/ResultW add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM @@ -127,18 +128,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -group alu -divider internals -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -expand -group alu -divider internals +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -314,8 +315,6 @@ add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/AtomicM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/CacheableM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/WriteDataM -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/ReadDataW -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/StallW add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/DCacheStall add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/hart/lsu/dcache/WayHit add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/hart/lsu/dcache/CacheHit @@ -365,7 +364,7 @@ add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress add wave -noupdate /testbench/dut/hart/lsu/hptw/genblk1/PRegEn TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Walk read is wrong} {26824 ns} 1} {{page table setup} {8167 ns} 1} {{eviction at wrong adr} {10128 ns} 1} {{Cursor 6} {41795656 ns} 0} +WaveRestoreCursors {{Walk read is wrong} {26824 ns} 1} {{page table setup} {8167 ns} 1} {{eviction at wrong adr} {10128 ns} 1} {{Cursor 6} {2898 ns} 0} quietly wave cursor active 4 configure wave -namecolwidth 250 configure wave -valuecolwidth 297 @@ -381,4 +380,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {41795482 ns} {41795818 ns} +WaveRestoreZoom {2835 ns} {2995 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 35851defd..3881e9cb6 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -115,7 +115,6 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_TLB_MISS = 'h13; localparam STATE_TLB_MISS_DONE = 'h14; - localparam STATE_INSTR_PAGE_FAULT = 'h15; localparam AHBByteLength = `XLEN / 8; @@ -369,7 +368,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) end STATE_TLB_MISS: begin if (WalkerInstrPageFaultF) begin - NextState = STATE_INSTR_PAGE_FAULT; + NextState = STATE_READY; ICacheStallF = 1'b0; end else if (ITLBWriteF) begin NextState = STATE_TLB_MISS_DONE; @@ -380,10 +379,6 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) STATE_TLB_MISS_DONE: begin NextState = STATE_READY; end - STATE_INSTR_PAGE_FAULT: begin - ICacheStallF = 1'b0; - NextState = STATE_READY; - end default: begin PCMux = 2'b01; NextState = STATE_READY; diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 0da202e57..b1edcfa8e 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -43,7 +43,7 @@ module dcache input logic [11:0] VAdr, // when hptw writes dtlb we use this address to index SRAM. input logic [`XLEN-1:0] WriteDataM, - output logic [`XLEN-1:0] ReadDataM, + output logic [`XLEN-1:0] ReadDataM, output logic DCacheStall, output logic CommittedM, output logic DCacheMiss, @@ -60,6 +60,7 @@ module dcache // from ptw input logic SelPTW, input logic WalkerPageFaultM, + output logic [`XLEN-1:0] LSUData, // ahb side output logic [`PA_BITS-1:0] AHBPAdr, // to ahb output logic AHBRead, @@ -147,6 +148,11 @@ module dcache logic SelEvict; logic LRUWriteEn; + + logic CaptureDataM; + logic [`XLEN-1:0] SavedReadDataM; + logic SelSavedReadDataM; + typedef enum {STATE_READY, @@ -331,7 +337,24 @@ module dcache subwordread subwordread(.HRDATA(ReadDataWordMuxM), .HADDRD(MemPAdrM[2:0]), .HSIZED({Funct3M[2], 1'b0, Funct3M[1:0]}), - .HRDATAMasked(ReadDataM)); + .HRDATAMasked(LSUData)); + + assign CaptureDataM = ~SelPTW & MemRWM[1]; + + flopen #(`XLEN) + SavedReadDataReg(.clk, + .en(CaptureDataM), + .d(LSUData), + .q(SavedReadDataM)); + + + mux2 #(`XLEN) + ReadDataMMux(.d0(LSUData), + .d1(SavedReadDataM), + .s(SelSavedReadDataM), + .y(ReadDataM)); + + // This is a confusing point. // The final read data should be updated only if the CPU's StallWtoDCache is low @@ -457,6 +480,7 @@ module dcache DCacheAccess = 1'b0; DCacheMiss = 1'b0; LRUWriteEn = 1'b0; + SelSavedReadDataM = 1'b0; case (CurrState) STATE_READY: begin @@ -659,6 +683,9 @@ module dcache if (ITLBWriteF | WalkerInstrPageFaultF) begin NextState = STATE_READY; + // this signal is gross. It is used to select the saved read data m when the + // CPU was stalled for an itlb miss with a simultaneous load. + SelSavedReadDataM = 1'b1; end // return to ready if page table walk completed. diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 2a46559dd..7afb24995 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -148,8 +148,9 @@ module lsu logic PendingInterruptMtoDCache; logic FlushWtoDCache; logic WalkerPageFaultM; - - + + logic [`XLEN-1:0] LSUData; + hptw hptw( .clk(clk), .reset(reset), @@ -163,7 +164,7 @@ module lsu .PageType, .ITLBWriteF(ITLBWriteF), .DTLBWriteM(DTLBWriteM), - .HPTWReadPTE(ReadDataM), + .HPTWReadPTE(LSUData), .HPTWStall(HPTWStall), .TranslationPAdr, .HPTWRead(HPTWRead), @@ -303,6 +304,7 @@ module lsu .VAdr(MemAdrM[11:0]), .WriteDataM(WriteDataM), .ReadDataM(ReadDataM), + .LSUData(LSUData), .DCacheStall(DCacheStall), .CommittedM(CommittedMfromDCache), .DCacheMiss,