From 3e916da36e4c96576554e7f28e3f79a272edc103 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 22 Jul 2021 19:42:19 -0500 Subject: [PATCH 1/4] Removed the hardware page table walker fault state from the icache so that the icache will only unstall CPU for 1 cycle. In the dcache we added a register to save the load read data in the event an itlb miss occurs concurrently with the load in the memory stage. Under this situation we need to record the load ReadDataM into a temporary register, SavedReadDataM. At this time the CPU is stall; however the walker is going to change the address in the dcache which destroys this data. When leaving the PTW_READY state via a walker instruction fault or ITLB write we select this SavedReadDataM so that the CPU can capture it. --- wally-pipelined/regression/wave.do | 33 ++++++++++++------------ wally-pipelined/src/cache/ICacheCntrl.sv | 7 +---- wally-pipelined/src/cache/dcache.sv | 31 ++++++++++++++++++++-- wally-pipelined/src/lsu/lsu.sv | 8 +++--- 4 files changed, 51 insertions(+), 28 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index b44c2b04..daecfc92 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,11 +7,12 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/priv/trap/InstrValidM -add wave -noupdate -expand -group {Memory Stage} /testbench/PCtextM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate /testbench/dut/hart/ieu/dp/ResultM +add wave -noupdate /testbench/dut/hart/ieu/dp/ResultW add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM @@ -127,18 +128,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -group alu -divider internals -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -expand -group alu -divider internals +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -314,8 +315,6 @@ add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/AtomicM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/CacheableM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/WriteDataM -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/ReadDataW -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/StallW add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/DCacheStall add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/hart/lsu/dcache/WayHit add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/hart/lsu/dcache/CacheHit @@ -365,7 +364,7 @@ add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress add wave -noupdate /testbench/dut/hart/lsu/hptw/genblk1/PRegEn TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Walk read is wrong} {26824 ns} 1} {{page table setup} {8167 ns} 1} {{eviction at wrong adr} {10128 ns} 1} {{Cursor 6} {41795656 ns} 0} +WaveRestoreCursors {{Walk read is wrong} {26824 ns} 1} {{page table setup} {8167 ns} 1} {{eviction at wrong adr} {10128 ns} 1} {{Cursor 6} {2898 ns} 0} quietly wave cursor active 4 configure wave -namecolwidth 250 configure wave -valuecolwidth 297 @@ -381,4 +380,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {41795482 ns} {41795818 ns} +WaveRestoreZoom {2835 ns} {2995 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 35851def..3881e9cb 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -115,7 +115,6 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_TLB_MISS = 'h13; localparam STATE_TLB_MISS_DONE = 'h14; - localparam STATE_INSTR_PAGE_FAULT = 'h15; localparam AHBByteLength = `XLEN / 8; @@ -369,7 +368,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) end STATE_TLB_MISS: begin if (WalkerInstrPageFaultF) begin - NextState = STATE_INSTR_PAGE_FAULT; + NextState = STATE_READY; ICacheStallF = 1'b0; end else if (ITLBWriteF) begin NextState = STATE_TLB_MISS_DONE; @@ -380,10 +379,6 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) STATE_TLB_MISS_DONE: begin NextState = STATE_READY; end - STATE_INSTR_PAGE_FAULT: begin - ICacheStallF = 1'b0; - NextState = STATE_READY; - end default: begin PCMux = 2'b01; NextState = STATE_READY; diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 0da202e5..b1edcfa8 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -43,7 +43,7 @@ module dcache input logic [11:0] VAdr, // when hptw writes dtlb we use this address to index SRAM. input logic [`XLEN-1:0] WriteDataM, - output logic [`XLEN-1:0] ReadDataM, + output logic [`XLEN-1:0] ReadDataM, output logic DCacheStall, output logic CommittedM, output logic DCacheMiss, @@ -60,6 +60,7 @@ module dcache // from ptw input logic SelPTW, input logic WalkerPageFaultM, + output logic [`XLEN-1:0] LSUData, // ahb side output logic [`PA_BITS-1:0] AHBPAdr, // to ahb output logic AHBRead, @@ -147,6 +148,11 @@ module dcache logic SelEvict; logic LRUWriteEn; + + logic CaptureDataM; + logic [`XLEN-1:0] SavedReadDataM; + logic SelSavedReadDataM; + typedef enum {STATE_READY, @@ -331,7 +337,24 @@ module dcache subwordread subwordread(.HRDATA(ReadDataWordMuxM), .HADDRD(MemPAdrM[2:0]), .HSIZED({Funct3M[2], 1'b0, Funct3M[1:0]}), - .HRDATAMasked(ReadDataM)); + .HRDATAMasked(LSUData)); + + assign CaptureDataM = ~SelPTW & MemRWM[1]; + + flopen #(`XLEN) + SavedReadDataReg(.clk, + .en(CaptureDataM), + .d(LSUData), + .q(SavedReadDataM)); + + + mux2 #(`XLEN) + ReadDataMMux(.d0(LSUData), + .d1(SavedReadDataM), + .s(SelSavedReadDataM), + .y(ReadDataM)); + + // This is a confusing point. // The final read data should be updated only if the CPU's StallWtoDCache is low @@ -457,6 +480,7 @@ module dcache DCacheAccess = 1'b0; DCacheMiss = 1'b0; LRUWriteEn = 1'b0; + SelSavedReadDataM = 1'b0; case (CurrState) STATE_READY: begin @@ -659,6 +683,9 @@ module dcache if (ITLBWriteF | WalkerInstrPageFaultF) begin NextState = STATE_READY; + // this signal is gross. It is used to select the saved read data m when the + // CPU was stalled for an itlb miss with a simultaneous load. + SelSavedReadDataM = 1'b1; end // return to ready if page table walk completed. diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 2a46559d..7afb2499 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -148,8 +148,9 @@ module lsu logic PendingInterruptMtoDCache; logic FlushWtoDCache; logic WalkerPageFaultM; - - + + logic [`XLEN-1:0] LSUData; + hptw hptw( .clk(clk), .reset(reset), @@ -163,7 +164,7 @@ module lsu .PageType, .ITLBWriteF(ITLBWriteF), .DTLBWriteM(DTLBWriteM), - .HPTWReadPTE(ReadDataM), + .HPTWReadPTE(LSUData), .HPTWStall(HPTWStall), .TranslationPAdr, .HPTWRead(HPTWRead), @@ -303,6 +304,7 @@ module lsu .VAdr(MemAdrM[11:0]), .WriteDataM(WriteDataM), .ReadDataM(ReadDataM), + .LSUData(LSUData), .DCacheStall(DCacheStall), .CommittedM(CommittedMfromDCache), .DCacheMiss, From 5d2b30e33294531f19d4e61b2787e444ea8772c8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 23 Jul 2021 08:11:15 -0400 Subject: [PATCH 2/4] Removed LEVELx states from HPTW --- wally-pipelined/src/mmu/hptw.sv | 92 ++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index b3bff0ea..845f495d 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -65,10 +65,10 @@ module hptw logic [`XLEN-1:0] TranslationVAdr; - typedef enum {LEVEL0_SET_ADR, LEVEL0_READ, LEVEL0, - LEVEL1_SET_ADR, LEVEL1_READ, LEVEL1, - LEVEL2_SET_ADR, LEVEL2_READ, LEVEL2, - LEVEL3_SET_ADR, LEVEL3_READ, LEVEL3, + typedef enum {L0_ADR, L0_RD, + L1_ADR, L1_RD, + L2_ADR, L2_RD, + L3_ADR, L3_RD, LEAF, IDLE, FAULT} statetype; statetype WalkerState, NextWalkerState, InitialWalkerState; @@ -97,7 +97,7 @@ module hptw // Enable and select signals based on states assign StartWalk = (WalkerState == IDLE) & TLBMiss; - assign HPTWRead = (WalkerState == LEVEL3_READ) | (WalkerState == LEVEL2_READ) | (WalkerState == LEVEL1_READ) | (WalkerState == LEVEL0_READ); + assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); assign SelPTW = (WalkerState != IDLE) & (WalkerState != FAULT); assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk; assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk; @@ -111,10 +111,10 @@ module hptw flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); always_comb case (WalkerState) - LEVEL3: NextPageType = 2'b11; // terapage - LEVEL2: NextPageType = 2'b10; // gigapage - LEVEL1: NextPageType = 2'b01; // megapage - LEVEL0: NextPageType = 2'b00; // kilopage + L3_RD: NextPageType = 2'b11; // terapage + L2_RD: NextPageType = 2'b10; // gigapage + L1_RD: NextPageType = 2'b01; // megapage + L0_RD: NextPageType = 2'b00; // kilopage default: NextPageType = PageType; endcase @@ -122,36 +122,36 @@ module hptw if (`XLEN==32) begin // RV32 logic [9:0] VPN; logic [`PPN_BITS-1:0] PPN; - assign VPN = ((WalkerState == LEVEL1_SET_ADR) | (WalkerState == LEVEL1_READ)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state - assign PPN = ((WalkerState == LEVEL1_SET_ADR) | (WalkerState == LEVEL1_READ)) ? BasePageTablePPN : CurrentPPN; + assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state + assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; assign TranslationPAdr = {PPN, VPN, 2'b00}; end else begin // RV64 logic [8:0] VPN; logic [`PPN_BITS-1:0] PPN; always_comb case (WalkerState) // select VPN field based on HPTW state - LEVEL3_SET_ADR, LEVEL3_READ: VPN = TranslationVAdr[47:39]; - LEVEL3, LEVEL2_SET_ADR, LEVEL2_READ: VPN = TranslationVAdr[38:30]; - LEVEL2, LEVEL1_SET_ADR, LEVEL1_READ: VPN = TranslationVAdr[29:21]; + L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; + L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; + L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; default: VPN = TranslationVAdr[20:12]; endcase - assign PPN = ((WalkerState == LEVEL3_SET_ADR) | (WalkerState == LEVEL3_READ) | - (SvMode != `SV48 & ((WalkerState == LEVEL2_SET_ADR) | (WalkerState == LEVEL2_READ)))) ? BasePageTablePPN : CurrentPPN; + assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | + (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; assign TranslationPAdr = {PPN, VPN, 3'b000}; end // Initial state and misalignment for RV32/64 if (`XLEN == 32) begin - assign InitialWalkerState = LEVEL1_SET_ADR; + assign InitialWalkerState = L1_ADR; assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0 - assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned); + assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned); end else begin logic GigapageMisaligned, TerapageMisaligned; - assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR; + assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR; assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0 assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0 assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0 - assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned); + assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned); end // Page Table Walker FSM @@ -164,29 +164,37 @@ module hptw case (WalkerState) IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; else NextWalkerState = IDLE; - LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ; - LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ; - else NextWalkerState = LEVEL3; - LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; - else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR; + L3_ADR: NextWalkerState = L3_RD; // first access in SV48 + L3_RD: if (HPTWStall) NextWalkerState = L3_RD; + else NextWalkerState = L2_ADR; +// LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; +// else if (ValidNonLeafPTE) NextWalkerState = L2_ADR; +// else NextWalkerState = FAULT; + L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39 + else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages + else if (ValidNonLeafPTE) NextWalkerState = L2_RD; + else NextWalkerState = FAULT; + L2_RD: if (HPTWStall) NextWalkerState = L2_RD; + else NextWalkerState = L1_ADR; +// LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; +// else if (ValidNonLeafPTE) NextWalkerState = L1_ADR; +// else NextWalkerState = FAULT; + L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32 + else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages + else if (ValidNonLeafPTE) NextWalkerState = L1_RD; + else NextWalkerState = FAULT; + L1_RD: if (HPTWStall) NextWalkerState = L1_RD; + else NextWalkerState = L0_ADR; +// LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; +// else if (ValidNonLeafPTE) NextWalkerState = L0_ADR; +// else NextWalkerState = FAULT; + L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages + else if (ValidNonLeafPTE) NextWalkerState = L0_RD; else NextWalkerState = FAULT; - LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ; - LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ; - else NextWalkerState = LEVEL2; - LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; - else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR; - else NextWalkerState = FAULT; - LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ; - LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ; - else NextWalkerState = LEVEL1; - LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; - else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR; - else NextWalkerState = FAULT; - LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ; - LEVEL0_READ: if (HPTWStall) NextWalkerState = LEVEL0_READ; - else NextWalkerState = LEVEL0; - LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF; - else NextWalkerState = FAULT; + L0_RD: if (HPTWStall) NextWalkerState = L0_RD; + else NextWalkerState = LEAF; +// LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF; +// else NextWalkerState = FAULT; LEAF: NextWalkerState = IDLE; FAULT: NextWalkerState = IDLE; default: begin From f3579032bdd3e812be5ec94716f37d840f28437f Mon Sep 17 00:00:00 2001 From: kipmacsaigoren Date: Fri, 23 Jul 2021 11:57:58 -0500 Subject: [PATCH 3/4] Cleaned up priority thermometer verilog. passses regression, ideally shortens critical path through pmp's --- wally-pipelined/src/mmu/hptw.sv | 12 ++--- wally-pipelined/src/mmu/pmpadrdec.sv | 23 +++------ wally-pipelined/src/mmu/pmpchecker.sv | 2 +- .../{prioritycircuit.sv => priorityonehot.sv} | 21 +++----- .../src/mmu/prioritythermometer.sv | 50 +++++++++++++++++++ wally-pipelined/src/mmu/tlblru.sv | 2 +- 6 files changed, 70 insertions(+), 40 deletions(-) rename wally-pipelined/src/mmu/{prioritycircuit.sv => priorityonehot.sv} (78%) create mode 100644 wally-pipelined/src/mmu/prioritythermometer.sv diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index 845f495d..fd7cee43 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -48,6 +48,12 @@ module hptw output logic WalkerInstrPageFaultF, WalkerLoadPageFaultM,WalkerStorePageFaultM // faults ); + typedef enum {L0_ADR, L0_RD, + L1_ADR, L1_RD, + L2_ADR, L2_RD, + L3_ADR, L3_RD, + LEAF, IDLE, FAULT} statetype; // *** placed outside generate statement to remove synthesis errors + generate if (`MEM_VIRTMEM) begin logic DTLBWalk; // register TLBs translation miss requests @@ -64,12 +70,6 @@ module hptw logic [`SVMODE_BITS-1:0] SvMode; logic [`XLEN-1:0] TranslationVAdr; - - typedef enum {L0_ADR, L0_RD, - L1_ADR, L1_RD, - L2_ADR, L2_RD, - L3_ADR, L3_RD, - LEAF, IDLE, FAULT} statetype; statetype WalkerState, NextWalkerState, InitialWalkerState; // Extract bits from CSRs and inputs diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 0fe2b2d7..61816782 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -67,9 +67,7 @@ module pmpadrdec ( assign TORMatch = PAgePMPAdrIn && PAltPMPAdr; // Naturally aligned regions - - // verilator lint_off UNOPTFLAT - logic [`PA_BITS-1:0] Mask; + logic [`PA_BITS-1:0] NAMask; //genvar i; // create a mask of which bits to ignore @@ -80,23 +78,14 @@ module pmpadrdec ( // assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore // end // endgenerate - prioritycircuit #(.ENTRIES(`PA_BITS-2), .FINAL_OP("NONE")) maskgen(.a(~PMPAdr[`PA_BITS-3:0]), .FirstPin(AdrMode==NAPOT), .y(Mask[`PA_BITS-1:2])); - assign Mask[1:0] = 2'b11; - // *** possible experiments: - /* PA < PMP addr could be in its own module, - preeserving hierarchy so we can know if this is the culprit on the critical path - Should take logarthmic time, so more like 6 levels than 40 should be expected + assign NAMask[1:0] = {2'b11}; - update mask generation - Should be concurrent with the subtraction/comparison - if one is the critical path, the other shouldn't be which makes us think the mask generation is the culprit. + prioritythemometer #(`PA_BITS-2) namaskgen( + .a({PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}), + .y(NAMask[`PA_BITS-1:2])); - Hopefully just use the priority circuit here - */ - // verilator lint_on UNOPTFLAT - - assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask); + assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | NAMask); assign Match = (AdrMode == TOR) ? TORMatch : (AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch : diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index eac4cc47..7dc37163 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -69,7 +69,7 @@ module pmpchecker ( .PAgePMPAdrOut(PAgePMPAdr), .FirstMatch, .Match, .Active, .L, .X, .W, .R); - prioritycircuit #(.ENTRIES(`PMP_ENTRIES), .FINAL_OP("AND")) pmppriority(.a(Match), .FirstPin(1'b1), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit. + priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit. // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; diff --git a/wally-pipelined/src/mmu/prioritycircuit.sv b/wally-pipelined/src/mmu/priorityonehot.sv similarity index 78% rename from wally-pipelined/src/mmu/prioritycircuit.sv rename to wally-pipelined/src/mmu/priorityonehot.sv index df44b35f..75825dc4 100644 --- a/wally-pipelined/src/mmu/prioritycircuit.sv +++ b/wally-pipelined/src/mmu/priorityonehot.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// prioritycircuit.sv +// priorityonehot.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021 // Modified: Teo Ene 15 Apr 2021: @@ -30,31 +30,22 @@ `include "wally-config.vh" -module prioritycircuit #(parameter ENTRIES = 8, - parameter FINAL_OP = "AND") ( +module priorityonehot #(parameter ENTRIES = 8) ( input logic [ENTRIES-1:0] a, - input logic FirstPin, output logic [ENTRIES-1:0] y ); - // verilator lint_off UNOPTFLAT + logic [ENTRIES-1:0] nolower; // generate thermometer code mask genvar i; generate - assign nolower[0] = FirstPin; + assign nolower[0] = 1'b1; for (i=1; i Date: Fri, 23 Jul 2021 14:00:44 -0400 Subject: [PATCH 4/4] testbench workaround for QEMU's SSTATUS XLEN bits --- wally-pipelined/testbench/testbench-linux.sv | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 79827c4f..64b0483d 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,7 +27,7 @@ module testbench(); - parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3160000; // # of instructions at which to turn on waves in graphical sim + parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3080000; // # of instructions at which to turn on waves in graphical sim parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can) /////////////////////////////////////////////////////////////////////////////// @@ -184,9 +184,12 @@ module testbench(); scan_file_rf = $fscanf(data_file_rf, "%d\n", regNumExpected); scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected); force dut.hart.ieu.dp.regf.wd3 = regExpected; - // Hack to compensate for QEMU's incorrect MSTATUS + // Hack to compensate for QEMU's incorrect MSTATUS (Wally correctly identifies MXL, SXL to be 2 whereas QEMU sets them to an invalid value of 0 end else if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,16) == "mstatus") begin force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'ha00000000; + // Hack to compensate for QEMU's incorrect SSTATUS (Wally correctly identifies UXL to be 2 whereas QEMU sets it to an invalid value of 0 + end else if (PCtextW.substr(0,3) == "csrr" && ((PCtextW.substr(10,16) == "sstatus") || (PCtextW.substr(11,17) == "sstatus"))) begin + force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'h200000000; end else release dut.hart.ieu.dp.regf.wd3; // Hack to compensate for QEMU's correct but different MTVAL (according to spec, storing the faulting instr is an optional feature) if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,14) == "mtval") begin @@ -265,7 +268,7 @@ module testbench(); // Check PCD, InstrD if (~PCDwrong && ~(dut.hart.ifu.PCD === PCDexpected)) begin - $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected); + $display("%0t ps, instr %0d: PCD does not equal PCD expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected); `ERROR end InstrMask = InstrDExpected[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF;